In [1]:
import pandas as pd

#### from list of lists

In [3]:
data = [['Almond Candy', 2.0], ['Chocolate Candy', 5.0], ['Coffee Candy', 4.0]] 
# Creates the pandas DataFrame 
df = pd.DataFrame(data, columns = ['Product', 'Price'])
# Prints the DataFrame
df

Unnamed: 0,Product,Price
0,Almond Candy,2.0
1,Chocolate Candy,5.0
2,Coffee Candy,4.0


#### from dictionary

In [6]:
data = {'Product':['Kit Kat', 'Snickers', 'Wonka Bar'], 'Price':[5.0, 5.0, 6.0]} 
df = pd.DataFrame(data, index=["Nestlé SA", "Mars Inc", "Nestlé SA"]) 
df 

Unnamed: 0,Product,Price
Nestlé SA,Kit Kat,5.0
Mars Inc,Snickers,5.0
Nestlé SA,Wonka Bar,6.0


above can also be accomplished using from_dict

In [24]:
data = {'Product':['Kit Kat', 'Snickers', 'Wonka Bar'], 'Price':[5.0, 5.0, 6.0]} 
df = pd.DataFrame.from_dict(data)
df

Unnamed: 0,Product,Price
0,Kit Kat,5.0
1,Snickers,5.0
2,Wonka Bar,6.0


#### from lists

In [10]:
Product = ['Kit Kat', 'Snickers', 'Wonka Bar']
Price = [5.0, 5.0, 6.0]  
tuples = list(zip(Product, Price))  
df = pd.DataFrame(tuples, columns = ['Product', 'Price']) 
df  

Unnamed: 0,Product,Price
0,Kit Kat,5.0
1,Snickers,5.0
2,Wonka Bar,6.0


tuples can be populated into a dataframe using from_records as well

In [26]:
df = pd.DataFrame.from_records(tuples, columns = ['Product', 'Price'])
df

Unnamed: 0,Product,Price
0,Kit Kat,5.0
1,Snickers,5.0
2,Wonka Bar,6.0


#### from tuples of columns

In [35]:
data = [('Product', ['Almond Candy', 'Chocolate Candy', 'Coffee Candy']),('Price', [5.0, 5.0, 6.0])] 
pd.DataFrame.from_items(data)

  


Unnamed: 0,Product,Price
0,Almond Candy,5.0
1,Chocolate Candy,5.0
2,Coffee Candy,6.0


#### from list of dictionaries

In [12]:
data = [{'Product': 'Kit Kat', 'Price': 5.0}, {'Product': "Snickers", 'Price': 5.0}, {'Product': 'Wonka Bar', 'Price': 6.0}] 
df = pd.DataFrame(data)
df

Unnamed: 0,Price,Product
0,5.0,Kit Kat
1,5.0,Snickers
2,6.0,Wonka Bar


#### from dictionary of series

In [7]:
data = {'col1' : pd.Series(["Red", "Black", "White", "Pink"], index =['a', 'b', 'c', 'd']), 
      'col2' : pd.Series([33, 44, 55, 66], index =['a', 'b', 'c', 'd'])} 
df = pd.DataFrame(data) 
df 

Unnamed: 0,col1,col2
a,Red,33
b,Black,44
c,White,55
d,Pink,66


#### from csv file

In [16]:
df = pd.read_csv("airline_passengars.csv")
df.head()

Unnamed: 0,Month,Passengers
0,1949-01,112
1,1949-02,118
2,1949-03,132
3,1949-04,129
4,1949-05,121


sets one of the columns as index

In [17]:
df = pd.read_csv("airline_passengars.csv", index_col="Month")
df.head()

Unnamed: 0_level_0,Passengers
Month,Unnamed: 1_level_1
1949-01,112
1949-02,118
1949-03,132
1949-04,129
1949-05,121


In [18]:
df.index

Index(['1949-01', '1949-02', '1949-03', '1949-04', '1949-05', '1949-06',
       '1949-07', '1949-08', '1949-09', '1949-10',
       ...
       '1960-03', '1960-04', '1960-05', '1960-06', '1960-07', '1960-08',
       '1960-09', '1960-10', '1960-11', '1960-12'],
      dtype='object', name='Month', length=144)

if we are working with time series data, we would like the index to be date time index, here is how to set it as date time index

In [21]:
df = pd.read_csv("airline_passengars.csv", index_col="Month", parse_dates=True)
df.head()


Unnamed: 0_level_0,Passengers
Month,Unnamed: 1_level_1
1949-01-01,112
1949-02-01,118
1949-03-01,132
1949-04-01,129
1949-05-01,121


In [22]:
df.index

DatetimeIndex(['1949-01-01', '1949-02-01', '1949-03-01', '1949-04-01',
               '1949-05-01', '1949-06-01', '1949-07-01', '1949-08-01',
               '1949-09-01', '1949-10-01',
               ...
               '1960-03-01', '1960-04-01', '1960-05-01', '1960-06-01',
               '1960-07-01', '1960-08-01', '1960-09-01', '1960-10-01',
               '1960-11-01', '1960-12-01'],
              dtype='datetime64[ns]', name='Month', length=144, freq=None)