In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

# Set the display width to a larger value (e.g., 200 characters)
pd.set_option('display.width', 200)

# Allow each column to display more content without truncation
pd.set_option('colheader_justify','center','display.max_colwidth', None)

In [2]:
# Data to be worked with
obs_df = pd.read_csv("observations.csv")
spec_df = pd.read_csv("species_info.csv")

# merge dataframes into one and reset the index
merged = pd.merge(obs_df, spec_df, on='scientific_name').sort_values(by='park_name', ascending=True).reset_index().drop(columns='index')

# Fill nan with 'Not Endangered' for conservation status. I'm sure there's a more accurate term, but I don't know the nomenclature
merged.conservation_status = merged.conservation_status.fillna('Not Endangered')

# easier to reference park list
park_l = merged.park_name.unique()

print(merged.head())

     scientific_name         park_name        observations     category                        common_names                    conservation_status
0       Elanus leucurus  Bryce National Park        71                Bird                                   White-Tailed Kite  Species of Concern
1  Leucanthemum vulgare  Bryce National Park        91      Vascular Plant  Oxeye Daisy, Ox-Eye Daisy, Oxeyedaisy, Oxeye-Daisy      Not Endangered
2        Rallus elegans  Bryce National Park        53                Bird                                           King Rail  Species of Concern
3  Leucanthemum vulgare  Bryce National Park       116      Vascular Plant                                               Daisy      Not Endangered
4  Leucanthemum vulgare  Bryce National Park       116      Vascular Plant  Oxeye Daisy, Ox-Eye Daisy, Oxeyedaisy, Oxeye-Daisy      Not Endangered


In [3]:
# create a dataframe for each park to make sure the numbers are correct
df_dict = {}
for x in park_l:
    df_dict[x] = merged[merged.park_name == x]

# create a dataset grouped by category showing the sum of observations
obspark_dict = {}
for x in park_l:
    obspark_dict[x] = df_dict[x].groupby('category').observations.sum()
print(obspark_dict[park_l[0]].keys())

# graphs for observations
# for x in range(len(park_l)):
#     ds = obspark_dict[park_l[x]]
#     ds_rl = range(len(ds))
#     ax = plt.subplot(2,2,x+1)
#     plt.bar(ds_rl,ds)
#     plt.title(park_l[x].title)
#     ax.set_xticks(ds_rl)
#     ax.set_xticklabels(ds.keys(),fontsize=5,rotation=90)
    

Index(['Amphibian', 'Bird', 'Fish', 'Mammal', 'Nonvascular Plant', 'Reptile', 'Vascular Plant'], dtype='object', name='category')
