# An introduction to Pandas dataframes

## Import the dataset

In [20]:
import pandas as pd
import os

# Get the absolute path to the current notebook
os_path = os.getcwd()
# get absolute path + dataset path
datapath = os_path + '\datasets\Online_Retail.csv'
sales_data = pd.read_csv(datapath, encoding='ISO-8859-1')
print(sales_data.head())

  InvoiceNo StockCode                          Description  Quantity  \
0    536365    85123A   WHITE HANGING HEART T-LIGHT HOLDER         6   
1    536365     71053                  WHITE METAL LANTERN         6   
2    536365    84406B       CREAM CUPID HEARTS COAT HANGER         8   
3    536365    84029G  KNITTED UNION FLAG HOT WATER BOTTLE         6   
4    536365    84029E       RED WOOLLY HOTTIE WHITE HEART.         6   

    InvoiceDate  UnitPrice  CustomerID         Country  
0  12/1/10 8:26       2.55     17850.0  United Kingdom  
1  12/1/10 8:26       3.39     17850.0  United Kingdom  
2  12/1/10 8:26       2.75     17850.0  United Kingdom  
3  12/1/10 8:26       3.39     17850.0  United Kingdom  
4  12/1/10 8:26       3.39     17850.0  United Kingdom  


The dataset is imported as a dataframe:

In [21]:
type(sales_data)

pandas.core.frame.DataFrame

## Create a Dataframe from an Array

In [22]:
import numpy as np

# array
data_arr = np.array([[1,2,3], [4,5,6], [7,8,9]])
# array to dataframe
df_from_array = pd.DataFrame(data_arr, columns=['A', 'B', 'C'])

Notice the difference between a numpy array and a dataframe. Notice how the dataframe includes some kind of labels for its columns and rows.

In [23]:
# numpy array
data_arr

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [24]:
# dataframe
df_from_array

Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6
2,7,8,9


In [25]:
print(f'Array: \n{data_arr}')
print(f'\nDataFrame: \n{df_from_array}')

Array: 
[[1 2 3]
 [4 5 6]
 [7 8 9]]

DataFrame: 
   A  B  C
0  1  2  3
1  4  5  6
2  7  8  9


## Create a DataFrame using lists

In [27]:
data_list = [[1,'Yui', 32], [2, 'Ai', 34]]

df_from_list = pd.DataFrame(data_list, columns = ['ID', 'Name', 'Age'])
df_from_list

Unnamed: 0,ID,Name,Age
0,1,Yui,32
1,2,Ai,34


## Create a DataFrame using dictionaries

### A ist of dictionaries

In [29]:
data_dicts = [{'ID': 1, 'Name':'Satoshi', 'Age': 29}, {'ID': 2, 'Name':'Juji', 'Age': 30}]

df_from_dicts = pd.DataFrame(data_dicts)
df_from_dicts

Unnamed: 0,ID,Name,Age
0,1,Satoshi,29
1,2,Juji,30


### A dictionary with lists as items

In [32]:
data_dict = {'ID': [1,2,3], 'Name':['Noel', 'Liam', 'Chris'], 'Age': [49, 47, 42]}

df_from_dict = pd.DataFrame(data_dict)
df_from_dict

Unnamed: 0,ID,Name,Age
0,1,Noel,49
1,2,Liam,47
2,3,Chris,42


### A dictionary with Panda Series as items

- A `Series` is as the labeled columns of the DataFrames are known.

In [33]:
data_dict_series = {'ID': [1,2,3], 'Name':['Feynman', 'Einstein', 'Newton'], 'Age': [47, 42, 24]}

df_from_dict_series = pd.DataFrame(data_dict_series)
df_from_dict_series

Unnamed: 0,ID,Name,Age
0,1,Feynman,47
1,2,Einstein,42
2,3,Newton,24
