# Working with Pandas in Python

In [14]:
# importing pandas
import pandas as pd

In [15]:
# loading a file
df = pd.read_csv("tips.csv")
# we can also read excel file: df = pd.read_excel("data.xlsx")  

## Important Functions of Pandas

In [16]:
# printing first 5 rows
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [45]:
# printing last 5 rows
df.tail()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.0,Female,Yes,Sat,Dinner,2
241,22.67,2.0,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2
243,18.78,3.0,Female,No,Thur,Dinner,2


In [46]:
# printing information of dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   total_bill  244 non-null    float64
 1   tip         244 non-null    float64
 2   sex         244 non-null    object 
 3   smoker      244 non-null    object 
 4   day         244 non-null    object 
 5   time        244 non-null    object 
 6   size        244 non-null    int64  
dtypes: float64(2), int64(1), object(4)
memory usage: 13.5+ KB


In [47]:
# printing summary of dataset
df.describe()

Unnamed: 0,total_bill,tip,size
count,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672
std,8.902412,1.383638,0.9511
min,3.07,1.0,1.0
25%,13.3475,2.0,2.0
50%,17.795,2.9,2.0
75%,24.1275,3.5625,3.0
max,50.81,10.0,6.0


## Intro to DataFrames

In [17]:
# converting dict to dataframe
data = {
    "Name": ["Ali", "Bilal", "Chaudhry"],
    "Age": [20, 25, 30],
    "City": ["Lahore", "Karachi", "Islamabad"]
}

df1 = pd.DataFrame(data)
df1

Unnamed: 0,Name,Age,City
0,Ali,20,Lahore
1,Bilal,25,Karachi
2,Chaudhry,30,Islamabad


In [18]:
# selecting/accessing columns from dataframes
name_df = df1[["Name"]]
name_df

Unnamed: 0,Name
0,Ali
1,Bilal
2,Chaudhry


In [19]:
# same in case of multiple columns
df2 = df1[["Name", "City"]]
df2

Unnamed: 0,Name,City
0,Ali,Lahore
1,Bilal,Karachi
2,Chaudhry,Islamabad


## Accessing Rows & Columns from dataframes based on index

In [25]:
# 1st row , 1st column
(df1.iloc[0,0])

'Ali'

In [26]:
(df1.iloc[2,2])

'Islamabad'

In [27]:
(df1.iloc[1,2])

'Karachi'

In [28]:
(df1.iloc[1,1])

25

## Accessing Rows & Columns from dataframes based on label

In [29]:
#first row of name column
(df1.loc[0, "Name"])

'Ali'

In [30]:
#second row of age column
(df1.loc[1, "Age"])

25

## DataFrame Slicing

In [39]:
#with iloc
#first 2 rows and first 3 columns
a = df1.iloc[0:2, 0:3]
a

Unnamed: 0,Name,Age,City
0,Ali,20,Lahore
1,Bilal,25,Karachi


In [40]:
#last 2 rows and last 2 columns
b = df1.iloc[0:2, 1:3]
b

Unnamed: 0,Age,City
0,20,Lahore
1,25,Karachi


## Unique Values, Filtering and Saving data

In [54]:
#unique values
data = {"Released": [1980, 1985, 1980, 1990, 1985, 1987, 1987, 1999, 1993, 1997]}
df3 = pd.DataFrame(data)

In [55]:
print(df3['Released'].unique())

[1980 1985 1990 1987 1999 1993 1997]


In [66]:
#printing unique days from tips dataset
print(df['day'].unique())

['Sun' 'Sat' 'Thur' 'Fri']


In [59]:
#filtering rows on base of year > 1990
d = df3["Released"] > 1990
e = df3[d]
e

Unnamed: 0,Released
7,1999
8,1993
9,1997


In [61]:
#bills with tip greater than 5.00
f = df['tip'] > 5.00
g = df[f]
g

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
23,39.42,7.58,Male,No,Sat,Dinner,4
44,30.4,5.6,Male,No,Sun,Dinner,4
47,32.4,6.0,Male,No,Sun,Dinner,4
52,34.81,5.2,Female,No,Sun,Dinner,4
59,48.27,6.73,Male,No,Sat,Dinner,4
85,34.83,5.17,Female,No,Thur,Lunch,4
88,24.71,5.85,Male,No,Thur,Lunch,2
116,29.93,5.07,Male,No,Sun,Dinner,4
141,34.3,6.7,Male,No,Thur,Lunch,6
155,29.85,5.14,Female,No,Sun,Dinner,5


## Example

In [68]:
# Sample DataFrame
details = {
    "Student": ["Ali", "Bilal", "Chaudhry", "Daniyal", "Emaan"],
    "Born": [2003, 2004, 1999, 2000, 1998]
}
dataframe = pd.DataFrame(details)

#students born after 2000
dataframe_filtered = dataframe[dataframe['Born'] > 2000]

# Saving in CSV
dataframe_filtered.to_csv("born_after_2000.csv", index=False)

print("Filtered Data:\n", dataframe_filtered)

Filtered Data:
   Student  Born
0     Ali  2003
1   Bilal  2004


In [3]:
import pandas as pd
born_after_2000 = pd.read_csv("born_after_2000.csv")

In [4]:
born_after_2000.head()

Unnamed: 0,Student,Born
0,Ali,2003
1,Bilal,2004
