# Pandas library


In [3]:
#import libraries
import numpy as np
import pandas as pd

## Working with Pandas Series

In [2]:
#define Series 
days = pd.Series(['Monday','Tuesday','Wednesday','Thursday','Friday'])
days

0       Monday
1      Tuesday
2    Wednesday
3     Thursday
4       Friday
dtype: object

In [5]:
#creating series with a numpy array
days_list = np.array(['Monday','Tuesday','Wednesday'])
numpy_series = pd.Series(days_list)
print(numpy_series)

0       Monday
1      Tuesday
2    Wednesday
dtype: object


In [6]:
#using strings as index
days = pd.Series(['Monday','Tuesday','Wednesday'],index = ['a','b', 'c'])
print(days)

a       Monday
b      Tuesday
c    Wednesday
dtype: object


In [8]:
#create series from a dictionary
#dictionary uses the '{}'instead of '[]'
days1 = pd.Series({'a':'Monday','b':'Tuesday', 'c':'Wednesday'})
print(days1)

a       Monday
b      Tuesday
c    Wednesday
dtype: object


In [9]:
#accessing or indexing Series
days1['c']

'Wednesday'

In [10]:
#accessing or indexing Series
days[0]

'Monday'

In [11]:
#accessing or indexing Series
days['b']

'Tuesday'

## Working with Pandas DataFrame

In [13]:
#function to create dataframe
empty_df = pd.DataFrame()
print(empty_df)

Empty DataFrame
Columns: []
Index: []


In [14]:
#create a data frame from a dictionary
df_dict = {'Country':['Ghana', 'Nigeria','Kenya','Togo'],
           'Capital': ['Accra', 'Abuja', 'Nairobi', 'Lome'],
           'Population':[12000, 10000,34000, 45000],
           'Age':[60,70,80,75]}
df = pd.DataFrame(df_dict, index = [2,4,6,8])
print(df)

   Country  Capital  Population  Age
2    Ghana    Accra       12000   60
4  Nigeria    Abuja       10000   70
6    Kenya  Nairobi       34000   80
8     Togo     Lome       45000   75


In [15]:
#create a dataframe from a list
df_list = [['Ghana','Accra',12000,60],
           ['Nigeria','Abuja',10000, 70],
           ['Kenya','Nairobi', 34000, 80],
           ['Togo','Lome', 45000, 75]]
df_1 = pd.DataFrame(df_list, columns = ['Country', 'Capital', 'Population', 'Age'], index = [2,4,6,8])
print(df_1)

   Country  Capital  Population  Age
2    Ghana    Accra       12000   60
4  Nigeria    Abuja       10000   70
6    Kenya  Nairobi       34000   80
8     Togo     Lome       45000   75


### Slicing and Indexing with DataFrame
#### Using iloc, loc, iat and at

In [16]:
#accessing data in dataframe using iloc 
df.iloc[3]

Country        Togo
Capital        Lome
Population    45000
Age              75
Name: 8, dtype: object

In [17]:
#accessing data in dataframe using loc 
df.loc[4]

Country       Nigeria
Capital         Abuja
Population      10000
Age                70
Name: 4, dtype: object

In [18]:
#accessing data using the column
df_1['Country']

2      Ghana
4    Nigeria
6      Kenya
8       Togo
Name: Country, dtype: object

In [19]:
#accessing data using iat
df_1.iat[1, 0]

'Nigeria'

In [20]:
#accessing data using at
df_1.at[4, 'Country']

'Nigeria'

## Summary and Descriptive Statistics with DataFrame

In [21]:
#sum of population of the countries
df['Population'].sum()

101000

In [22]:
#mean of numeric columns
df.mean()

Population    25250.00
Age              71.25
dtype: float64

In [23]:
#decribe function to output the statistics of numeric columns
df.describe()

Unnamed: 0,Population,Age
count,4.0,4.0
mean,25250.0,71.25
std,17075.811352,8.539126
min,10000.0,60.0
25%,11500.0,67.5
50%,23000.0,72.5
75%,36750.0,76.25
max,45000.0,80.0


## Dealing with missing values in Data Frames

In [27]:
#create a dataframe with missing values
df_dict2 = {'Name': ['Arthur', 'James', 'Caroline', np.nan],
            'Profession': ['Seeker', 'Writer','Doctor','Teacher'],
            'Experience': [12, np.nan, 10, 8],
            'Height': [np.nan, 160, 155, 125]}
df2 = pd.DataFrame(df_dict2)
print(df2)

       Name Profession  Experience  Height
0    Arthur     Seeker        12.0     NaN
1     James     Writer         NaN   160.0
2  Caroline     Doctor        10.0   155.0
3       NaN    Teacher         8.0   125.0


In [28]:
#isnull() checks for cells with missing values and returns True
df2.isnull()

Unnamed: 0,Name,Profession,Experience,Height
0,False,False,False,True
1,False,False,True,False
2,False,False,False,False
3,True,False,False,False


In [29]:
#dropna() removes rows with missing values
df2.dropna()

Unnamed: 0,Name,Profession,Experience,Height
2,Caroline,Doctor,10.0,155.0
