In [90]:
import numpy as np
import pandas as pd

data = np.array([['','Col1','Col2'],
                ['Row1',1,2],
                ['Row2',3,4]])
                
print(pd.DataFrame(data=data[1:,1:], 
                  index=data[1:,0],
                  columns=data[0,1:]))
#indices start at 0
#number to the left of colon indicates the starting index, and the right indicates ending index

     Col1 Col2
Row1    1    2
Row2    3    4


In [91]:
# Take a 2D array as input to your DataFrame 
my_2darray = np.array([[1, 2, 3], [4, 5, 6]])
print(pd.DataFrame(my_2darray))
print("==============")

# Take a dictionary as input to your DataFrame 
my_dict = {1: ['1', '3'], 2: ['1', '2'], 3: ['2', '4']}
print(pd.DataFrame(my_dict))
print("==============")
# Take a DataFrame as input to your DataFrame 
my_df = pd.DataFrame(data=[4,5,6,7], index=range(0,4), columns=['A'])
print(pd.DataFrame(my_df))
print("==============")
# Take a Series as input to your DataFrame
my_series = pd.Series({"United Kingdom":"London", "India":"New Delhi", "United States":"Washington", 
                       "Belgium":"Brussels"})
print(pd.DataFrame(my_series))

   0  1  2
0  1  2  3
1  4  5  6
   1  2  3
0  1  1  2
1  3  2  4
   A
0  4
1  5
2  6
3  7
                         0
United Kingdom      London
India            New Delhi
United States   Washington
Belgium           Brussels


In [92]:
df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6]]))
print(df)
print("=============")
#shape shows the dimension of a dataframe - height and width
print(df.shape)
print("=============")

#len()gives the height of the dataframe
print(len(df.index))

list(df.columns.values)

   0  1  2
0  1  2  3
1  4  5  6
(2, 3)
2


[0, 1, 2]

In [93]:
# Check out your DataFrame `df`
print(df)
print("=============")

# Define the new names of your columns
newcols = {
    '0': 'A', 
    '1': 'B', 
    '2': 'C'
}

# Use `rename()` to rename your columns
df.rename(columns=newcols, inplace=True)

# Rename your index
df.rename(index={1: 'a'}, inplace=True)
print(df)

   0  1  2
0  1  2  3
1  4  5  6
   0  1  2
0  1  2  3
a  4  5  6


In [94]:
# Use `reset_index()` to reset the values
df.reset_index(level=0, drop=True)

#resetting index to start from 1
df.index = np.arange(1, len(df) + 1)
print(df)

   0  1  2
1  1  2  3
2  4  5  6


In [95]:
# This will make an index labeled `3` and add the new values. Loc looks at the lable of the index. 
df.loc[3] = [11, 12, 13]
print(df)
print("=============")

# iloc looks at the position of the index. In this case, "2" refers to the index with position "2".
df.iloc[2] = [60, 50, 40]
print(df)


    0   1   2
1   1   2   3
2   4   5   6
3  11  12  13
    0   1   2
1   1   2   3
2   4   5   6
3  60  50  40


In [96]:
# Using `iloc[]` to select an element
print(df.iloc[0][0])
print("================")

# Use `iloc[]` to select a row
print(df.iloc[0])
print("================")

# Use `loc[]` to select a column
print(df.loc[:,0])

1
0    1
1    2
2    3
Name: 1, dtype: int64
1     1
2     4
3    60
Name: 0, dtype: int64


In [97]:
print(df)
print("=============")

# Append a column to `df`
df.loc[:, 3] = pd.Series([10, 11,12], index=df.index)

# Print out `df` again to see the changes
print(df)

    0   1   2
1   1   2   3
2   4   5   6
3  60  50  40
    0   1   2   3
1   1   2   3  10
2   4   5   6  11
3  60  50  40  12


In [98]:
# Drop the column at position 1
df.drop(df.columns[[1]], axis=1,inplace=True) 
#The axis argument is either 0 when it indicates rows and 1 when it is used to drop columns
print(df)

    0   2   3
1   1   3  10
2   4   6  11
3  60  40  12


In [99]:
example_df = pd.DataFrame({"Student 1": ['Ok', 'Awful','Acceptable'], 'Student 2': ['Perfect', 'Awful','Ok'],
              'Student 3': ['Acceptable', 'Perfect','Poor']})
print(example_df)
print("====================")
# Replace the strings by numerical values (0-4)
example_df.replace(['Awful', 'Poor', 'Ok', 'Acceptable', 'Perfect'], [0, 1, 2, 3, 4],inplace = True) 
print(example_df)

    Student 1 Student 2   Student 3
0          Ok   Perfect  Acceptable
1       Awful     Awful     Perfect
2  Acceptable        Ok        Poor
   Student 1  Student 2  Student 3
0          2          4          3
1          0          0          4
2          3          2          1


In [100]:
print(df)
print('==========')
doubler = lambda x: x*2
#Apply the `doubler` function to the index with position 1
df_doubler = df.loc[2].map(doubler)
print(df_doubler)

    0   2   3
1   1   3  10
2   4   6  11
3  60  40  12
0     8
2    12
3    22
Name: 2, dtype: int64


In [101]:
def doubler(x):
    if x % 2 == 0:
        return x
    else:
        return x * 2

# Use `applymap()` to apply `doubler()` to your DataFrame
doubled_df = df.applymap(doubler)

# Check the DataFrame
print(doubled_df)

    0   2   3
1   2   6  10
2   4   6  22
3  60  40  12


In [102]:
empty_df = pd.DataFrame(np.nan, index=[0,1,2,3], columns=['A'])
print(empty_df)

    A
0 NaN
1 NaN
2 NaN
3 NaN


In [103]:
# Create your DataFrame
products = pd.DataFrame({'category': ['Cleaning', 'Cleaning', 'Entertainment', 'Entertainment', 'Tech', 'Tech'],
        'store': ['Walmart', 'Dia', 'Walmart', 'Fnac', 'Dia','Walmart'],
        'price':[11.42, 23.50, 19.99, 15.95, 55.75, 111.55],
        'testscore': [4, 3, 5, 7, 5, 8]})
        
print(products)
print("========")

# Use `pivot()` to pivot the DataFrame
pivot_products = products.pivot(index='category', columns='store', values='price')
#values allows you to specify which values of your original DataFrame you want to see in your pivot table.
#columns: whatever you pass to this argument will become a column in your resulting table.
#index: whatever you pass to this argument will become an index in your resulting table.

print(pivot_products)

        category    store   price  testscore
0       Cleaning  Walmart   11.42          4
1       Cleaning      Dia   23.50          3
2  Entertainment  Walmart   19.99          5
3  Entertainment     Fnac   15.95          7
4           Tech      Dia   55.75          5
5           Tech  Walmart  111.55          8
store            Dia   Fnac  Walmart
category                            
Cleaning       23.50    NaN    11.42
Entertainment    NaN  15.95    19.99
Tech           55.75    NaN   111.55


In [104]:
products2 = pd.DataFrame({'category': ['Cleaning', 'Cleaning', 'Entertainment', 'Entertainment', 'Tech', 'Tech'],
                        'store': ['Walmart', 'Dia', 'Walmart', 'Fnac', 'Dia','Walmart'],
                        'price':[11.42, 23.50, 19.99, 15.95, 19.99, 111.55],
                        'testscore': [4, 3, 5, 7, 5, 8]})
print(products2)
print("=================")
# Pivot your `products` DataFrame with `pivot_table()`
pivot_products2 = products2.pivot_table(index='category', columns='store', values='price', aggfunc='mean')

# Check out the results
print(pivot_products2)

        category    store   price  testscore
0       Cleaning  Walmart   11.42          4
1       Cleaning      Dia   23.50          3
2  Entertainment  Walmart   19.99          5
3  Entertainment     Fnac   15.95          7
4           Tech      Dia   19.99          5
5           Tech  Walmart  111.55          8
store            Dia   Fnac  Walmart
category                            
Cleaning       23.50    NaN    11.42
Entertainment    NaN  15.95    19.99
Tech           19.99    NaN   111.55


In [105]:
# The `people` DataFrame
people = pd.DataFrame({'FirstName' : ['John', 'Jane'],
                       'LastName' : ['Doe', 'Austen'],
                       'BloodType' : ['A-', 'B+'],
                       'Weight' : [90, 64]})
print(people)
print("================")
# Use `melt()` on the `people` DataFrame
print(pd.melt(people, id_vars=['FirstName', 'LastName'], var_name='measurements'))

  FirstName LastName BloodType  Weight
0      John      Doe        A-      90
1      Jane   Austen        B+      64
  FirstName LastName measurements value
0      John      Doe    BloodType    A-
1      Jane   Austen    BloodType    B+
2      John      Doe       Weight    90
3      Jane   Austen       Weight    64


In [107]:
df = pd.DataFrame(data=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=['A', 'B', 'C'])
print(df)
print('=========')
for index, row in df.iterrows(): #iterate over the DataFrame
    print(row['A'], row['B'])

   A  B  C
0  1  2  3
1  4  5  6
2  7  8  9
1 2
4 5
7 8
