In [1]:
import pandas as pd
url = 'https://raw.githubusercontent.com/justmarkham/pandas-videos/master/data/drinks.csv'

drinks = pd.read_csv(url)
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [2]:
drinks.dtypes

country                          object
beer_servings                     int64
spirit_servings                   int64
wine_servings                     int64
total_litres_of_pure_alcohol    float64
continent                        object
dtype: object

In [4]:
# Method 1: Change datatype after reading the csv

drinks.beer_servings = drinks.beer_servings.astype(float)
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0.0,0,0,0.0,Asia
1,Albania,89.0,132,54,4.9,Europe
2,Algeria,25.0,0,14,0.7,Africa
3,Andorra,245.0,138,312,12.4,Europe
4,Angola,217.0,57,45,5.9,Africa


In [5]:
drinks.dtypes

country                          object
beer_servings                   float64
spirit_servings                   int64
wine_servings                     int64
total_litres_of_pure_alcohol    float64
continent                        object
dtype: object

In [8]:
# Method 2: Change datatype before reading the csv

drinks = pd.read_csv(url, dtype={'wine_servings':float})
drinks.dtypes

country                          object
beer_servings                     int64
spirit_servings                   int64
wine_servings                   float64
total_litres_of_pure_alcohol    float64
continent                        object
dtype: object

In [10]:
url2 = 'https://raw.githubusercontent.com/justmarkham/pandas-videos/master/data/chipotle.tsv'

orders = pd.read_table(url2)
orders.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98


In [11]:
orders.dtypes

order_id               int64
quantity               int64
item_name             object
choice_description    object
item_price            object
dtype: object

In [30]:
isinstance(orders['item_name'][0], str)

True

In [29]:
type(orders['item_name'][0])

str

In [25]:
# The issue here is how pandas don't recognize item_price as a floating object
# we use .str to replace $ and then convert to float

orders['item_price'] = orders['item_price'].str.replace('$', '').astype(float)
orders.head()

order_id                int64
quantity                int64
item_name              object
choice_description     object
item_price            float64
dtype: object

In [31]:
# we can now calculate the mean

orders.item_price.mean()

7.464335785374397

In [32]:
# To find out whether a column's row contains a certain string by return True or False

orders.item_name.str.contains('Chicken').head()

0    False
1    False
2    False
3    False
4     True
Name: item_name, dtype: bool

In [33]:
# convert True/False to binary value

orders.item_name.str.contains('Chicken').astype(int).head()

0    0
1    0
2    0
3    0
4    1
Name: item_name, dtype: int64