In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
num_tasks = [10,12,13,14,15,16,17]

series_list = pd.Series(num_tasks) # we can use the same method for arrays

print(f"Number of tasks per day:\n{series_list}")

# changing the index of a series
tasks_list_labeled = pd.Series(num_tasks, index = ['Mon','Tue','Wed','Thu','Fri', 'Sat', 'Sun'])
print(f"Days Labeled:\n{tasks_list_labeled}")

Number of tasks per day:
0    10
1    12
2    13
3    14
4    15
5    16
6    17
dtype: int64
Days Labeled:
Mon    10
Tue    12
Wed    13
Thu    14
Fri    15
Sat    16
Sun    17
dtype: int64


In [3]:
student = pd.Series(['Mary', 'Peter', 'Hulk', 'Toby', 'Thanos'])
grades = pd.Series(['B-','A+','A-', 'B+', 'C']) 
# Lists can also be used, not necessarily Pandas Series

student_df = pd.DataFrame({'Student':student,'Grade':grades})
student_df

Unnamed: 0,Student,Grade
0,Mary,B-
1,Peter,A+
2,Hulk,A-
3,Toby,B+
4,Thanos,C


In [4]:
rand_df = pd.DataFrame(np.random.randn(5,2),columns = ['Trial 1', 'Trial 2'])
rand_df

Unnamed: 0,Trial 1,Trial 2
0,-1.684392,-1.744922
1,-1.25259,0.077965
2,-0.032092,-0.39637
3,0.331693,-0.386385
4,-0.624009,-1.062581


In [5]:
cars = ['Volkswagen', 'Toyota', 'Ford', 'General Motors', 'Mercedes-Benz']
revenue = [335.04, 296.61, 174.22, 171.97, 165.68] # in Billions

#creating a Series from lists
car_ma = pd.Series(revenue, index=cars)
car_ma

Volkswagen        335.04
Toyota            296.61
Ford              174.22
General Motors    171.97
Mercedes-Benz     165.68
dtype: float64

In [6]:
print(f"1st element: {car_ma[0]}\n")
print(f"First 3 elements:\n{car_ma[:3]}\n")
print(f"Last 2 elements:\n{car_ma[-2:]}\n")
print(f"2nd 3rd 4th elements:\n{car_ma[[1,2,3]]}")

1st element: 335.04

First 3 elements:
Volkswagen    335.04
Toyota        296.61
Ford          174.22
dtype: float64

Last 2 elements:
General Motors    171.97
Mercedes-Benz     165.68
dtype: float64

2nd 3rd 4th elements:
Toyota            296.61
Ford              174.22
General Motors    171.97
dtype: float64


In [7]:
print(f"Toyota revenue: {car_ma['Toyota']}\n")
print(f"First 4 elements:\n{car_ma[:'General Motors']}\n")
print(f"Mercedes, Ford, Volkswagen revenue:\n{car_ma[['Mercedes-Benz','Ford','Volkswagen']]}")

Toyota revenue: 296.61

First 4 elements:
Volkswagen        335.04
Toyota            296.61
Ford              174.22
General Motors    171.97
dtype: float64

Mercedes, Ford, Volkswagen revenue:
Mercedes-Benz    165.68
Ford             174.22
Volkswagen       335.04
dtype: float64


In [8]:
sales_data = pd.DataFrame({'CustomerID': ['ID_001','ID_002','ID_003','ID_004','ID_005']
                           ,'location': ['Berlin', 'London', 'Nice', 'New York', 'Vienna']
                           ,'gender': ['M','M','F','M','F']
                           ,'type': ['Electronics','Food&Beverages','Food&Beverages','Medicine','Beauty']
                           ,'quantity':[1,3,4,2,1],'bill':[670,75,125,250,80]})
sales_data

Unnamed: 0,CustomerID,location,gender,type,quantity,bill
0,ID_001,Berlin,M,Electronics,1,670
1,ID_002,London,M,Food&Beverages,3,75
2,ID_003,Nice,F,Food&Beverages,4,125
3,ID_004,New York,M,Medicine,2,250
4,ID_005,Vienna,F,Beauty,1,80


In [9]:
# accessing first column of the dataframe
sales_data['location'] 
# alternately we can use sales_data.location
# the difference is: in dot notation we can not use column names with spaces e.g. location ID

0      Berlin
1      London
2        Nice
3    New York
4      Vienna
Name: location, dtype: object

In [10]:
# accessing the rows in reverse when it is negative i.e. from botom to top each 2nd element
sales_data[::-2]

Unnamed: 0,CustomerID,location,gender,type,quantity,bill
4,ID_005,Vienna,F,Beauty,1,80
2,ID_003,Nice,F,Food&Beverages,4,125
0,ID_001,Berlin,M,Electronics,1,670


In [11]:
# accessing first index value using loc method (indexing starts from 0 in python)
sales_data.loc[1]

CustomerID            ID_002
location              London
gender                     M
type          Food&Beverages
quantity                   3
bill                      75
Name: 1, dtype: object

In [12]:
# accessing 1st and 4th index values along with location and type columns
sales_data.loc[[1,4],['location','type']]

Unnamed: 0,location,type
1,London,Food&Beverages
4,Vienna,Beauty


In [13]:
# accessing the same elements using iloc method
# this method we use 1 & 3 instead of location and type column names
sales_data.iloc[[1,4],[1,3]]

Unnamed: 0,location,type
1,London,Food&Beverages
4,Vienna,Beauty


In [14]:
print(sales_data.loc[[4],['type']])
sales_data.loc[4,'type'] = 'Electronics'
print(sales_data.loc[[4],['type']])

     type
4  Beauty
          type
4  Electronics


In [15]:
sales_data.iloc[4,3] = 'Beauty'
sales_data

Unnamed: 0,CustomerID,location,gender,type,quantity,bill
0,ID_001,Berlin,M,Electronics,1,670
1,ID_002,London,M,Food&Beverages,3,75
2,ID_003,Nice,F,Food&Beverages,4,125
3,ID_004,New York,M,Medicine,2,250
4,ID_005,Vienna,F,Beauty,1,80


In [16]:
sales_data['quantity']>1 # filtering quantity column with values more than 1

0    False
1     True
2     True
3     True
4    False
Name: quantity, dtype: bool

In [17]:
sales_data.loc[sales_data['quantity']>1] 

Unnamed: 0,CustomerID,location,gender,type,quantity,bill
1,ID_002,London,M,Food&Beverages,3,75
2,ID_003,Nice,F,Food&Beverages,4,125
3,ID_004,New York,M,Medicine,2,250


In [18]:
# adding a new column in data frame sales_data which is a rating (out of 5) given by customer 
sales_data['rating'] = [2,5,3,4,4]
sales_data

Unnamed: 0,CustomerID,location,gender,type,quantity,bill,rating
0,ID_001,Berlin,M,Electronics,1,670,2
1,ID_002,London,M,Food&Beverages,3,75,5
2,ID_003,Nice,F,Food&Beverages,4,125,3
3,ID_004,New York,M,Medicine,2,250,4
4,ID_005,Vienna,F,Beauty,1,80,4


In [19]:
sales_data.drop('CustomerID',axis=1, inplace=True)
# axis=1 specifies that the operation is performed along columns
# inplace=True directly alters the existing DataFrame sales_data

In [20]:
sales_data.drop(1, inplace=True) 
# axis=0 is not stated as in default axis=0

In [21]:
# resetting the index of data frame
sales_data.reset_index()

Unnamed: 0,index,location,gender,type,quantity,bill,rating
0,0,Berlin,M,Electronics,1,670,2
1,2,Nice,F,Food&Beverages,4,125,3
2,3,New York,M,Medicine,2,250,4
3,4,Vienna,F,Beauty,1,80,4


In [22]:
# setting inplace = True to make the changes permanent
sales_data.reset_index(drop=True,inplace=True)
sales_data

Unnamed: 0,location,gender,type,quantity,bill,rating
0,Berlin,M,Electronics,1,670,2
1,Nice,F,Food&Beverages,4,125,3
2,New York,M,Medicine,2,250,4
3,Vienna,F,Beauty,1,80,4
