# Table of Contents
 <p>

In [1]:
import pandas as pd
import numpy as np
%matplotlib inline

# 1. Create a dataframe from another object

## 1.1 Dictionary

In [2]:
df = pd.DataFrame({'id':[100, 101, 102], 'color':['red', 'green', 'blue']}, columns=['id', 'color'])
df

Unnamed: 0,id,color
0,100,red
1,101,green
2,102,blue


## 1.2 list

In [3]:
pd.DataFrame([[100, 'red'], [101, 'green'], [102, 'blue']], columns=['id', 'color'])

Unnamed: 0,id,color
0,100,red
1,101,green
2,102,blue


## 1.3 numpy array

In [4]:
arr = np.random.rand(4, 2)
arr

array([[ 0.156243  ,  0.36793182],
       [ 0.41683384,  0.43149743],
       [ 0.56204018,  0.03731216],
       [ 0.22503991,  0.50787615]])

In [5]:
pd.DataFrame(arr, columns=['one', 'two'])

Unnamed: 0,one,two
0,0.156243,0.367932
1,0.416834,0.431497
2,0.56204,0.037312
3,0.22504,0.507876


In [6]:
pd.DataFrame({'student':np.arange(100,110,1), 'score':np.random.randint(60,101,10)}, columns=['student', 'score'])

Unnamed: 0,student,score
0,100,74
1,101,89
2,102,80
3,103,90
4,104,76
5,105,90
6,106,83
7,107,60
8,108,93
9,109,88


## 1.4 Series

In [7]:
s = pd.Series(['round', 'square', 'triangle'], name='shape')
s

0       round
1      square
2    triangle
Name: shape, dtype: object

In [8]:
pd.concat([df, s], axis=1)
# aligned by index

Unnamed: 0,id,color,shape
0,100,red,round
1,101,green,square
2,102,blue,triangle


# 2. Apply, map applymap

## 2.1 map

In [9]:
train = pd.read_csv('http://bit.ly/kaggletrain')
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [10]:
train['Sex_num'] = train.Sex.map({'male':1, 'female':0})
train.loc[0:4, ['Sex', 'Sex_num']]

Unnamed: 0,Sex,Sex_num
0,male,1
1,female,0
2,female,0
3,female,0
4,male,1


## 2.2 apply

In [11]:
train['Name_length'] = train.Name.apply(len)
train.loc[0:4, ['Name', 'Name_length']]

Unnamed: 0,Name,Name_length
0,"Braund, Mr. Owen Harris",23
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",51
2,"Heikkinen, Miss. Laina",22
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",44
4,"Allen, Mr. William Henry",24


In [12]:
train['Fare_ceil'] = train.Fare.apply(np.ceil)
train.loc[0:4, ['Fare', 'Fare_ceil']]

Unnamed: 0,Fare,Fare_ceil
0,7.25,8.0
1,71.2833,72.0
2,7.925,8.0
3,53.1,54.0
4,8.05,9.0


In [13]:
def get_element(mylist, position):
    return mylist[position]

In [14]:
train.Name.str.split(',').apply(get_element, position=0).head()

0       Braund
1      Cumings
2    Heikkinen
3     Futrelle
4        Allen
Name: Name, dtype: object

In [15]:
train.Name.str.split(',').apply(lambda x:x[0]).head()

0       Braund
1      Cumings
2    Heikkinen
3     Futrelle
4        Allen
Name: Name, dtype: object

In [16]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [17]:
drinks.loc[:, 'beer_servings':'wine_servings'].apply(max, axis=0)

beer_servings      376
spirit_servings    438
wine_servings      370
dtype: int64

In [18]:
drinks.loc[:, 'beer_servings':'wine_servings'].apply(np.argmax, axis=0)
# index

beer_servings      117
spirit_servings     68
wine_servings       61
dtype: int64

In [19]:
drinks.loc[117, 'beer_servings']

376

## 2.3 applymap

In [20]:
drinks.loc[:, 'beer_servings':'wine_servings'].applymap(float).head()
# work on every element

Unnamed: 0,beer_servings,spirit_servings,wine_servings
0,0.0,0.0,0.0
1,89.0,132.0,54.0
2,25.0,0.0,14.0
3,245.0,138.0,312.0
4,217.0,57.0,45.0
