# DATA FRAMES

In [3]:
import pandas as pd

In [3]:
items = {
    'camilo': pd.Series([1,2,3], index=["bike", "pants", "bananas"]),
    'carito': pd.Series([40, 110, 500, 45], index=["book", "glassess", "bike", "pants"])
}

In [4]:
shopping_carts = pd.DataFrame(items)
shopping_carts

Unnamed: 0,camilo,carito
bananas,3.0,
bike,1.0,500.0
book,,40.0
glassess,,110.0
pants,2.0,45.0


In [5]:
shopping_carts.index

Index(['bananas', 'bike', 'book', 'glassess', 'pants'], dtype='object')

In [6]:
shopping_carts.values

array([[  3.,  nan],
       [  1., 500.],
       [ nan,  40.],
       [ nan, 110.],
       [  2.,  45.]])

In [8]:
shopping_carts.columns


Index(['camilo', 'carito'], dtype='object')

In [11]:
shopping_carts.ndim

2

In [13]:
shopping_carts.shape

(5, 2)

In [14]:
shopping_carts.size

10

## Selections

In [16]:
camilo_data = pd.DataFrame(items, columns=['camilo'])
camilo_data

Unnamed: 0,camilo
bike,1
pants,2
bananas,3


In [17]:
sel_shopping_cart = pd.DataFrame(items, index=['book', 'pants'])
sel_shopping_cart


Unnamed: 0,camilo,carito
book,,40
pants,2.0,45


In [18]:
carito_sel_shopping_cart = pd.DataFrame(items, index=['book', 'pants'], columns=['carito'])
carito_sel_shopping_cart

Unnamed: 0,carito
book,40
pants,45


## Another way to created DataFrame

In [19]:
items = [{'camilo': 1, 'pedro': 2, 'carito': 3}]
dat_frame = pd.DataFrame(items, index=['number_of_carts'])
dat_frame

Unnamed: 0,camilo,pedro,carito
number_of_carts,1,2,3


## Accesing elements

In [4]:
# We create a list of Python dictionaries
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35},
          {'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5}]

# We create a DataFrame  and provide the row index
store_items = pd.DataFrame(items2, index = ['store 1', 'store 2'])

# We display the DataFrame
store_items


Unnamed: 0,bikes,pants,watches,glasses
store 1,20,30,35,
store 2,15,5,10,50.0


In [24]:
print('How many bikes are in each store:\n', store_items[['bikes']])

How many bikes are in each store:
          bikes
store 1     20
store 2     15


In [25]:
print('How many bikes and pants are in each store:\n', store_items[['bikes', 'pants']])

How many bikes and pants are in each store:
          bikes  pants
store 1     20     30
store 2     15      5


In [26]:
print('What items are in Store 1:\n', store_items.loc[['store 1']])

What items are in Store 1:
          bikes  pants  watches  glasses
store 1     20     30       35      NaN


In [27]:
print('How many bikes are in Store 2:', store_items['bikes']['store 2'])

How many bikes are in Store 2: 15


## Adding new elements

In [28]:
# We add a new column named shirts to our store_items DataFrame indicating the number of# shirts in stock at each store. We will put 15 shirts in store 1 and 2 shirts in store 2
store_items['shirts'] = [15,2]

# We display the modified DataFrame
store_items

Unnamed: 0,bikes,pants,watches,glasses,shirts
store 1,20,30,35,,15
store 2,15,5,10,50.0,2


In [29]:
# We make a new column called suits by adding the number of shirts and pants
store_items['suits'] = store_items['pants'] + store_items['shirts']

# We display the modified DataFrame
store_items

Unnamed: 0,bikes,pants,watches,glasses,shirts,suits
store 1,20,30,35,,15,45
store 2,15,5,10,50.0,2,7


In [30]:
# We create a dictionary from a list of Python dictionaries that will contain the number of different items at the new store
new_items = [{'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4}]

# We create new DataFrame with the new_items and provide and index labeled store 3
new_store = pd.DataFrame(new_items, index = ['store 3'])

# We display the items at the new store
new_store

Unnamed: 0,bikes,pants,watches,glasses
store 3,20,30,35,4


In [32]:
# We append store 3 to our store_items DataFrame
store_items = pd.concat([store_items, new_store])

# We display the modified DataFrame
store_items

Unnamed: 0,bikes,pants,watches,glasses,shirts,suits
store 1,20,30,35,,15.0,45.0
store 2,15,5,10,50.0,2.0,7.0
store 3,20,30,35,4.0,,


In [33]:
# We insert a new column with label shoes right before the column with numerical index 4
store_items.insert(4, 'shoes', [8,5,0])

# we display the modified DataFrame
store_items

Unnamed: 0,bikes,pants,watches,glasses,shoes,shirts,suits
store 1,20,30,35,,8,15.0,45.0
store 2,15,5,10,50.0,5,2.0,7.0
store 3,20,30,35,4.0,0,,


In [38]:
store_items['new_watches'] = store_items['watches'] * 2
store_items

Unnamed: 0,bikes,pants,watches,glasses,shoes,shirts,suits,new_watches
store 1,20,30,35,,8,15.0,45.0,70
store 2,15,5,10,50.0,5,2.0,7.0,20
store 3,20,30,35,4.0,0,,,70


# deleting

In [39]:
# We remove the new watches column
store_items.pop('new_watches')

# we display the modified DataFrame
store_items

Unnamed: 0,bikes,pants,watches,glasses,shoes,shirts,suits
store 1,20,30,35,,8,15.0,45.0
store 2,15,5,10,50.0,5,2.0,7.0
store 3,20,30,35,4.0,0,,


In [40]:
# We remove the watches and shoes columns
store_items = store_items.drop(['watches', 'shoes'], axis = 1)

# we display the modified DataFrame
store_items

Unnamed: 0,bikes,pants,glasses,shirts,suits
store 1,20,30,,15.0,45.0
store 2,15,5,50.0,2.0,7.0
store 3,20,30,4.0,,


In [41]:
# We remove the store 2 and store 1 rows
store_items = store_items.drop(['store 2', 'store 1'], axis = 0)

# we display the modified DataFrame
store_items


Unnamed: 0,bikes,pants,glasses,shirts,suits
store 3,20,30,4.0,,


## Rename

In [42]:
# We change the column label bikes to hats
store_items = store_items.rename(columns = {'bikes': 'hats'})

# we display the modified DataFrame
store_items

Unnamed: 0,hats,pants,glasses,shirts,suits
store 3,20,30,4.0,,


In [43]:
# We change the row label from store 3 to last store
store_items = store_items.rename(index = {'store 3': 'last store'})

# we display the modified DataFrame
store_items

Unnamed: 0,hats,pants,glasses,shirts,suits
last store,20,30,4.0,,


In [44]:
# We change the row index to be the data in the pants column
store_items = store_items.set_index('pants')

# we display the modified DataFrame
store_items


Unnamed: 0_level_0,hats,glasses,shirts,suits
pants,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
30,20,4.0,,


## Dealing with Nan


In [54]:

# We create a list of Python dictionaries
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35, 'shirts': 15, 'shoes':8, 'suits':45},
{'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5, 'shirts': 2, 'shoes':5, 'suits':7},
{'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4, 'shoes':10}]

# We create a DataFrame  and provide the row index
store_items = pd.DataFrame(items2, index = ['store 1', 'store 2', 'store 3'])

# We display the DataFrame
store_items


Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20,30,35,15.0,8,45.0,
store 2,15,5,10,2.0,5,7.0,50.0
store 3,20,30,35,,10,,4.0


In [50]:
# We count the number of NaN values in store_items
store_items.isnull() # Replace values for True or False if there is Nan

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,False,False,False,False,False,False,True
store 2,False,False,False,False,False,False,False
store 3,False,False,False,True,False,True,False


In [52]:
# We count the number of NaN values in store_items
x =  store_items.isnull().sum().sum()

# We print x
print('Number of NaN values in our DataFrame:', x)

Number of NaN values in our DataFrame: 3


### Eliminating NaN Values

In [55]:
# We drop any rows with NaN values
store_items.dropna(axis = 0)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 2,15,5,10,2.0,5,7.0,50.0


In [56]:
# We drop any columns with NaN values
store_items.dropna(axis = 1)

Unnamed: 0,bikes,pants,watches,shoes
store 1,20,30,35,8
store 2,15,5,10,5
store 3,20,30,35,10


## Replace NaN 

In [58]:
# We replace all NaN values with 0
store_items.fillna(0)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20,30,35,15.0,8,45.0,0.0
store 2,15,5,10,2.0,5,7.0,50.0
store 3,20,30,35,0.0,10,0.0,4.0


In [60]:
# We replace NaN values with the previous value in the column
store_items.ffill(axis = 0)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20,30,35,15.0,8,45.0,
store 2,15,5,10,2.0,5,7.0,50.0
store 3,20,30,35,2.0,10,7.0,4.0


In [61]:
# We replace NaN values with the previous value in the row
store_items.ffill(axis = 1)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20.0,30.0,35.0,15.0,8.0,45.0,45.0
store 2,15.0,5.0,10.0,2.0,5.0,7.0,50.0
store 3,20.0,30.0,35.0,35.0,10.0,10.0,4.0


In [10]:
# We replace NaN values with the next value in the column
store_items.bfill(axis = 0)



'2.1.4'

In [65]:
# We replace NaN values with the next value in the row
store_items.bfill(axis = 1)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20.0,30.0,35.0,15.0,8.0,45.0,
store 2,15.0,5.0,10.0,2.0,5.0,7.0,50.0
store 3,20.0,30.0,35.0,10.0,10.0,4.0,4.0


We can also choose to replace NaN values by using different interpolation methods. For example, the .interpolate(method = 'linear', axis) method will use linear interpolation to replace NaN values using the values along the given axis. Let's see some examples:

In [66]:
# We replace NaN values by using linear interpolation using column values
store_items.interpolate(method = 'linear', axis = 0)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20,30,35,15.0,8,45.0,
store 2,15,5,10,2.0,5,7.0,50.0
store 3,20,30,35,2.0,10,7.0,4.0


In [67]:
# We replace NaN values by using linear interpolation using row values
store_items.interpolate(method = 'linear', axis = 1)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20.0,30.0,35.0,15.0,8.0,45.0,45.0
store 2,15.0,5.0,10.0,2.0,5.0,7.0,50.0
store 3,20.0,30.0,35.0,22.5,10.0,7.0,4.0
