## Pandas basic tutorials

In [6]:
#importing both numpy and pandas
import pandas as pd
import numpy as np

### Pandas Series

In [7]:
#population of countries in millions
g7_pop = pd.Series([43.434,46.464,21.678,67.890,12.307,18.898,11.987]) #creates a new pandas series
g7_pop

0    43.434
1    46.464
2    21.678
3    67.890
4    12.307
5    18.898
6    11.987
dtype: float64

In [11]:
g7_pop.name = "G7 population in millions" #specifies the name 
g7_pop.dtype
g7_pop.values

array([43.434, 46.464, 21.678, 67.89 , 12.307, 18.898, 11.987])

In [12]:
g7_pop[1]

46.464

In [13]:
g7_pop.index

RangeIndex(start=0, stop=7, step=1)

In [14]:
g7_pop.index =[
    'Canada',
    'France',
    'Germany',
    'Italy','Japan','UK','USA'
] #changes the indices of the series elements
g7_pop

Canada     43.434
France     46.464
Germany    21.678
Italy      67.890
Japan      12.307
UK         18.898
USA        11.987
Name: G7 population in millions, dtype: float64

##### Creating a series from scratch

In [16]:
pd.Series({
    'Canada':34.333,
    'USA':24.456
}, name="Sample of g7 countries") #Creates a series 

pd.Series([34.444,56.896],index=['Canada','Germany'],name="Sample G7 countries") #also creates a new series

Canada     34.444
Germany    56.896
Name: Sample G7 countries, dtype: float64

## Indexing 

In [18]:
g7_pop['Canada']

43.434

In [17]:
g7_pop

Canada     43.434
France     46.464
Germany    21.678
Italy      67.890
Japan      12.307
UK         18.898
USA        11.987
Name: G7 population in millions, dtype: float64

In [20]:
g7_pop.iloc[-1] #indexing with iloc

11.987

In [23]:
#multiple selection
g7_pop[['Italy','France']]
g7_pop.iloc[[0,1,3,-1]]

Canada    43.434
France    46.464
Italy     67.890
USA       11.987
Name: G7 population in millions, dtype: float64

In [24]:
#slicing 
g7_pop['Canada':'Italy']

Canada     43.434
France     46.464
Germany    21.678
Italy      67.890
Name: G7 population in millions, dtype: float64

## Conditional selection(Boolean arrays)

In [25]:
g7_pop

Canada     43.434
France     46.464
Germany    21.678
Italy      67.890
Japan      12.307
UK         18.898
USA        11.987
Name: G7 population in millions, dtype: float64

In [26]:
g7_pop*1_000_000

Canada     43434000.0
France     46464000.0
Germany    21678000.0
Italy      67890000.0
Japan      12307000.0
UK         18898000.0
USA        11987000.0
Name: G7 population in millions, dtype: float64

In [27]:
g7_pop > 30

Canada      True
France      True
Germany    False
Italy       True
Japan      False
UK         False
USA        False
Name: G7 population in millions, dtype: bool

In [30]:
g7_pop[g7_pop <30]

Germany    21.678
Japan      12.307
UK         18.898
USA        11.987
Name: G7 population in millions, dtype: float64

In [31]:
g7_pop.mean()

31.808285714285713

In [34]:
np.log(g7_pop)

Canada     3.771243
France     3.838678
Germany    3.076298
Italy      4.217889
Japan      2.510168
UK         2.939056
USA        2.483823
Name: G7 population in millions, dtype: float64

In [35]:
g7_pop[(g7_pop > 20) & (g7_pop <30)]

Germany    21.678
Name: G7 population in millions, dtype: float64

### Modifying Series

In [36]:
g7_pop['Canada'] = 77.567
g7_pop

Canada     77.567
France     46.464
Germany    21.678
Italy      67.890
Japan      12.307
UK         18.898
USA        11.987
Name: G7 population in millions, dtype: float64

In [37]:
g7_pop.iloc[-1] = 300
g7_pop

Canada      77.567
France      46.464
Germany     21.678
Italy       67.890
Japan       12.307
UK          18.898
USA        300.000
Name: G7 population in millions, dtype: float64

In [38]:
g7_pop[g7_pop > 200] =47.789
g7_pop

Canada     77.567
France     46.464
Germany    21.678
Italy      67.890
Japan      12.307
UK         18.898
USA        47.789
Name: G7 population in millions, dtype: float64