### Dataframes

In [9]:
#python version 
df = {
    'index' : [0,1,2],
    'cols' : [
        {
            'name':'growth',
            'data':[.5,.7,1.2]},
        {
            'name':'Name',
            'data':['Paul','George','Ringo']
        },
    ]
}

In [10]:
def get_row(df,idx):
    results = []
    value_idx = df['index'].index(idx)
    for col in df['cols']:
        results.append(col['data'][value_idx])
    return results    

In [11]:
get_row(df,1)

[0.7, 'George']

In [12]:
# using pandas 
import pandas as pd 
df = pd.DataFrame({
    'growth':[.5,.7,1.2],
    'Name':['Paul','George','Ringo']
})

In [13]:
df

Unnamed: 0,growth,Name
0,0.5,Paul
1,0.7,George
2,1.2,Ringo


In [14]:
df.iloc[2]

growth      1.2
Name      Ringo
Name: 2, dtype: object

In [15]:
df['Name']

0      Paul
1    George
2     Ringo
Name: Name, dtype: object

In [16]:
df['Name'].str.lower()

0      paul
1    george
2     ringo
Name: Name, dtype: object

### construction of df can be from 
- columns
- rows
- csv files
- numpy ndarrays
- other SQL , HDF5, arrow etc 

In [17]:
# dataframe from columns
pd.DataFrame([
    {'growth':.5,'Name':'Paul'},
    {'growth':.7,'Name':'George'},
    {'growth':1.2,'Name':'Ringo'}
    ])

Unnamed: 0,growth,Name
0,0.5,Paul
1,0.7,George
2,1.2,Ringo


In [18]:
# from csv 
from io import StringIO
csv_file = StringIO("""growth, Name
        .5,Paul
        .7, George
        1.2,Ringo""")

In [19]:
pd.read_csv(csv_file)

Unnamed: 0,growth,Name
0,0.5,Paul
1,0.7,George
2,1.2,Ringo


In [20]:
import numpy as np
np.random.seed(42)
pd.DataFrame(np.random.randn(10,3),
      columns=['a','b','c'])

Unnamed: 0,a,b,c
0,0.496714,-0.138264,0.647689
1,1.52303,-0.234153,-0.234137
2,1.579213,0.767435,-0.469474
3,0.54256,-0.463418,-0.46573
4,0.241962,-1.91328,-1.724918
5,-0.562288,-1.012831,0.314247
6,-0.908024,-1.412304,1.465649
7,-0.225776,0.067528,-1.424748
8,-0.544383,0.110923,-1.150994
9,0.375698,-0.600639,-0.291694


### Dataframe Axis

In [21]:
df.axes

[RangeIndex(start=0, stop=3, step=1),
 Index(['growth', 'Name'], dtype='object')]

In [22]:
df

Unnamed: 0,growth,Name
0,0.5,Paul
1,0.7,George
2,1.2,Ringo


In [23]:
df.sum(axis=0)

growth                2.4
Name      PaulGeorgeRingo
dtype: object

In [24]:
df.sum(axis=1)

TypeError: unsupported operand type(s) for +: 'float' and 'str'

In [25]:
df.sum(axis='index')

growth                2.4
Name      PaulGeorgeRingo
dtype: object

In [26]:
df.sum(axis='columns')

TypeError: unsupported operand type(s) for +: 'float' and 'str'

In [27]:
df.axes[0]

RangeIndex(start=0, stop=3, step=1)

In [28]:
df.axes[1]

Index(['growth', 'Name'], dtype='object')

In [34]:
df = pd.DataFrame({'Score1':[1,None],
                   'Score2':[85,90]})

In [35]:
df

Unnamed: 0,Score1,Score2
0,1.0,85
1,,90


In [36]:
df.axes

[RangeIndex(start=0, stop=2, step=1),
 Index(['Score1', 'Score2'], dtype='object')]

In [37]:
df.sum(axis=0)

Score1      1.0
Score2    175.0
dtype: float64

In [38]:
df.apply(np.sum, axis=0)

Score1      1.0
Score2    175.0
dtype: float64

In [39]:
df.sum(axis=1)

0    86.0
1    90.0
dtype: float64

### Exercise 16.7

In [44]:
# 1 Create a dataFrame with the names of your colleagues, their age , and title
df = pd.DataFrame([
    {'name':'Bharti', 'age':35, 'title':'reporter'},
    {'name':'Bhao', 'age':61, 'title':'Sr reporter'},
    {'name':'Nameste','age':67, 'title':'author'}    
])

In [45]:
df

Unnamed: 0,name,age,title
0,Bharti,35,reporter
1,Bhao,61,Sr reporter
2,Nameste,67,author


In [47]:
# capitalize the values in the name column
df['name'].str.upper()

0     BHARTI
1       BHAO
2    NAMESTE
Name: name, dtype: object

In [48]:
# sum up the value of the age columns
df.axes

[RangeIndex(start=0, stop=3, step=1),
 Index(['name', 'age', 'title'], dtype='object')]

In [50]:
total_age = df['age'].sum()
total_age

np.int64(163)