 # Simple Pandas Examples

In [None]:
import matplotlib.pyplot as plt # For creating the simple plot
import pandas as pd
%matplotlib inline

In [None]:
#Read in the CSV file
file = "Resources/zoo.csv"
df = pd.read_csv(file)
df

In [None]:
# Count all the items in the colums. The count "should" be the same for
# all columns, but notice they are not the same.
#This means there is bad data somewhere that we need to clean.
df.count()

In [None]:
# Remove any roes with a NaN (something that is not a number)
df = df.dropna()
df

In [None]:
# Look at the averages, do they seem reasonable?
# No! the average is really large. There must be a
# really large number somewhere.
df.describe()

In [None]:
# The water_need average is still high. Remove any values outside
# a reasonable range. Say, 10000. 
df = df[df['water_need'] < 10000] 
df

In [None]:
# Now use describe and see if everything looks OK:
df.describe()

 # Great! Now the data is clean, we can continue...

In [None]:
# Get a list of all the animals
# Note that df['animal'] returns a Pandas Series, which has to be turned into a Python list
animals_series = df['animal']
animals_list = animals_series.tolist()
animals_list

In [None]:
# Get a list of all the animals
# Note that df['animal'] returns a Pandas Series, which has to be turned into a Python list
# Note that df['animal'] is the same as df.animal
animals_series = df.animal
animals_list = animals_series.tolist()
animals_list

In [None]:
# Note that df['animal'] returns a Pandas Series, which has to be turned into a Python list
# Note that df['animal'] is the same as df.animal
# Note you can shorten this to one line
animals_list = df.animal.tolist()
animals_list

In [None]:
# Loop through the list of animals, print them each on a line
for animal in animals_list:
    print(animal)

In [None]:
# Use the Pandas unique() function to get a list of unique animals.
df.animal.unique().tolist()

In [None]:
# Get the number of differnt animals. Use len()
len(df.animal.unique())

In [None]:
# Get a count of gender
df[['gender','animal']].groupby('gender').count()

In [None]:
# Lets read in the CSV file and clean it
file = "Resources/zoo.csv"
df = pd.read_csv(file)
df = df.dropna()
df = df[df['water_need'] < 10000] 
df

In [None]:
# Draw a bar chart!
df.plot(kind='bar',x='animal',y='water_need',color='blue')
#plt.rcParams['font.size'] = 14
#plt.rcParams['figure.figsize'] = (10,4)
plt.title("Animal Water Needs")
plt.ylabel("Water Needs")
plt.xlabel("Animals")

plt.show()

In [None]:
# Get the average water need by animal.
water_need_mean_by_animal = df[['animal','water_need']].groupby('animal').mean()
water_need_mean_by_animal

In [None]:
# Draw a bar chart again, bit of the averages by animal.
# Note that the Pandas plot dicumentation at 
#   https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.plot.bar.html
#   and
#   https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.plot.html#pandas.DataFrame.plot
# tell me that I should be able to specify the xlabel, ylabel, and legend in the parameters
# of the bar() function, but it gives an error if I do so.
# So, I change these after the bar plot is created.
water_need_mean_by_animal.plot.bar(rot=0, 
                                   figsize=(10, 4), fontsize=12)
plt.title("Average Animal Water Needs", fontsize=24)
plt.ylabel("Water Needs (Average)", fontsize=14)
plt.xlabel("Animals", fontsize=14)
# Change the legend (the thing in the upper right corner). Note that
# this needs to be a Python list (enclosed in []) ecause you can
# potentially have more than one. Yep - that is not intuitive.
plt.legend(["Water need (average)"], fontsize=8)

plt.show()