In [1]:
import pandas as pd

In [2]:
cities = ['Philadelphia', 'Boston', 'Baltimore', 'Orlando']
states = ['PA', 'MA', 'MD', 'FD']
population = [1.5,0.6, 0.6, 2.35]
visitors = [41, 19, 24, 66]
list_labels = ["City", "State", "Population", "Visitors"]
list_cols = [cities, states, population, visitors]
zipped = list(zip(list_labels, list_cols))
zipped

[('City', ['Philadelphia', 'Boston', 'Baltimore', 'Orlando']),
 ('State', ['PA', 'MA', 'MD', 'FD']),
 ('Population', [1.5, 0.6, 0.6, 2.35]),
 ('Visitors', [41, 19, 24, 66])]

In [3]:
data = dict(zipped)
data

{'City': ['Philadelphia', 'Boston', 'Baltimore', 'Orlando'],
 'Population': [1.5, 0.6, 0.6, 2.35],
 'State': ['PA', 'MA', 'MD', 'FD'],
 'Visitors': [41, 19, 24, 66]}

In [4]:
df = pd.DataFrame(data)
df

Unnamed: 0,City,Population,State,Visitors
0,Philadelphia,1.5,PA,41
1,Boston,0.6,MA,19
2,Baltimore,0.6,MD,24
3,Orlando,2.35,FD,66


In [5]:
df.index = df['State']
df

Unnamed: 0_level_0,City,Population,State,Visitors
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PA,Philadelphia,1.5,PA,41
MA,Boston,0.6,MA,19
MD,Baltimore,0.6,MD,24
FD,Orlando,2.35,FD,66


In [6]:
for st, row in df.iterrows():
    print("st", st)
    print("row", row)

st PA
row City          Philadelphia
Population             1.5
State                   PA
Visitors                41
Name: PA, dtype: object
st MA
row City          Boston
Population       0.6
State             MA
Visitors          19
Name: MA, dtype: object
st MD
row City          Baltimore
Population          0.6
State                MD
Visitors             24
Name: MD, dtype: object
st FD
row City          Orlando
Population       2.35
State              FD
Visitors           66
Name: FD, dtype: object


In [7]:
for st, row in df.iterrows():
    print (st + ": " + row["City"])

PA: Philadelphia
MA: Boston
MD: Baltimore
FD: Orlando


In [8]:
for st, row in df.iterrows():
    df.loc[st, "city_length"] = len(row['City'])
df

Unnamed: 0_level_0,City,Population,State,Visitors,city_length
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
PA,Philadelphia,1.5,PA,41,12.0
MA,Boston,0.6,MA,19,6.0
MD,Baltimore,0.6,MD,24,9.0
FD,Orlando,2.35,FD,66,7.0


In [9]:
df["new_city_length"] = df["City"].apply(len)

In [10]:
df

Unnamed: 0_level_0,City,Population,State,Visitors,city_length,new_city_length
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
PA,Philadelphia,1.5,PA,41,12.0,12
MA,Boston,0.6,MA,19,6.0,6
MD,Baltimore,0.6,MD,24,9.0,9
FD,Orlando,2.35,FD,66,7.0,7


In [11]:
df.new_city_length > 7

State
PA     True
MA    False
MD     True
FD    False
Name: new_city_length, dtype: bool

In [12]:
greater_than = df.new_city_length > 7
df[greater_than]

Unnamed: 0_level_0,City,Population,State,Visitors,city_length,new_city_length
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
PA,Philadelphia,1.5,PA,41,12.0,12
MD,Baltimore,0.6,MD,24,9.0,9


In [13]:
df[(df.new_city_length > 7) & (df.new_city_length < 10)]

Unnamed: 0_level_0,City,Population,State,Visitors,city_length,new_city_length
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MD,Baltimore,0.6,MD,24,9.0,9


In [14]:
df[(df.new_city_length == 9) | (df.new_city_length >= 10)]

Unnamed: 0_level_0,City,Population,State,Visitors,city_length,new_city_length
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
PA,Philadelphia,1.5,PA,41,12.0,12
MD,Baltimore,0.6,MD,24,9.0,9


In [15]:
df.City[df.new_city_length == 7]

State
FD    Orlando
Name: City, dtype: object

In [23]:
# df.Visitors[df.new_city_length == 6] *= 2
# df
df.Visitors[df.new_city_length == 6] *= 2


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [17]:
def hundreds(n):
    return n/100
df['Visitors'].apply(hundreds)

State
PA    0.41
MA    0.38
MD    0.24
FD    0.66
Name: Visitors, dtype: float64

In [18]:
df['Visitors'].apply(lambda n: n/100)

State
PA    0.41
MA    0.38
MD    0.24
FD    0.66
Name: Visitors, dtype: float64

In [19]:
df['Visitors/hundreds'] = df.Visitors.apply(lambda n: n/100)
df

Unnamed: 0_level_0,City,Population,State,Visitors,city_length,new_city_length,Visitors/hundreds
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
PA,Philadelphia,1.5,PA,41,12.0,12,0.41
MA,Boston,0.6,MA,38,6.0,6,0.38
MD,Baltimore,0.6,MD,24,9.0,9,0.24
FD,Orlando,2.35,FD,66,7.0,7,0.66


In [20]:
df.City = df.City.str.upper()
df

Unnamed: 0_level_0,City,Population,State,Visitors,city_length,new_city_length,Visitors/hundreds
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
PA,PHILADELPHIA,1.5,PA,41,12.0,12,0.41
MA,BOSTON,0.6,MA,38,6.0,6,0.38
MD,BALTIMORE,0.6,MD,24,9.0,9,0.24
FD,ORLANDO,2.35,FD,66,7.0,7,0.66


In [21]:
df["Total_population"] = df.Population + df.Visitors
df

Unnamed: 0_level_0,City,Population,State,Visitors,city_length,new_city_length,Visitors/hundreds,Total_population
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
PA,PHILADELPHIA,1.5,PA,41,12.0,12,0.41,42.5
MA,BOSTON,0.6,MA,38,6.0,6,0.38,38.6
MD,BALTIMORE,0.6,MD,24,9.0,9,0.24,24.6
FD,ORLANDO,2.35,FD,66,7.0,7,0.66,68.35
