In [2]:
import pandas as pd

df = pd.read_csv('data/Toyota.csv')
df.head(5)

Unnamed: 0.1,Unnamed: 0,Price,Age,KM,FuelType,HP,MetColor,Automatic,CC,Doors,Weight
0,0,13500,23.0,46986,Diesel,90,1.0,0,2000,three,1165
1,1,13750,23.0,72937,Diesel,90,1.0,0,2000,3,1165
2,2,13950,24.0,41711,Diesel,90,,0,2000,3,1165
3,3,14950,26.0,48000,Diesel,90,0.0,0,2000,3,1165
4,4,13750,30.0,38500,Diesel,90,0.0,0,2000,3,1170


In [4]:
# Convert columns to numeric if they are not already
df['Price'] = pd.to_numeric(df['Price'], errors='coerce')
df['KM'] = pd.to_numeric(df['KM'], errors='coerce')
df['Age'] = pd.to_numeric(df['Age'], errors='coerce')

In [5]:
# a. Create data subsets (e.g., cars with Price > 10000 and KM < 50000)
a = df[(df['Price'] > 10000) & (df['KM'] < 50000)]
a

Unnamed: 0.1,Unnamed: 0,Price,Age,KM,FuelType,HP,MetColor,Automatic,CC,Doors,Weight
0,0,13500,23.0,46986.0,Diesel,90,1.0,0,2000,three,1165
2,2,13950,24.0,41711.0,Diesel,90,,0,2000,3,1165
3,3,14950,26.0,48000.0,Diesel,90,0.0,0,2000,3,1165
4,4,13750,30.0,38500.0,Diesel,90,0.0,0,2000,3,1170
8,8,21500,27.0,19700.0,Petrol,192,0.0,0,1800,3,1185
...,...,...,...,...,...,...,...,...,...,...,...
1037,1037,10500,58.0,25000.0,Petrol,110,1.0,0,1600,5,1075
1039,1039,10950,59.0,22705.0,,110,1.0,0,1600,5,1075
1041,1041,10900,62.0,17345.0,,110,1.0,0,1600,3,1050
1042,1042,10950,67.0,15535.0,Petrol,86,,1,1300,4,1030


In [7]:
# b. Merge Data (mock merge with a small lookup table on 'FuelType')
fuel_info = pd.DataFrame({
    'FuelType': df['FuelType'].head(),
    'Region': ['North', 'South', 'East', 'West', 'Central']
})
b = pd.merge(df, fuel_info, on='FuelType', how='left')
b

Unnamed: 0.1,Unnamed: 0,Price,Age,KM,FuelType,HP,MetColor,Automatic,CC,Doors,Weight,Region
0,0,13500,23.0,46986.0,Diesel,90,1.0,0,2000,three,1165,North
1,0,13500,23.0,46986.0,Diesel,90,1.0,0,2000,three,1165,South
2,0,13500,23.0,46986.0,Diesel,90,1.0,0,2000,three,1165,East
3,0,13500,23.0,46986.0,Diesel,90,1.0,0,2000,three,1165,West
4,0,13500,23.0,46986.0,Diesel,90,1.0,0,2000,three,1165,Central
...,...,...,...,...,...,...,...,...,...,...,...,...
2007,1431,7500,,20544.0,Petrol,86,1.0,0,1300,3,1025,
2008,1432,10845,72.0,,Petrol,86,0.0,0,1300,3,1015,
2009,1433,8500,,17016.0,Petrol,86,0.0,0,1300,3,1015,
2010,1434,7250,70.0,,,86,1.0,0,1300,3,1015,


In [8]:
# c. Sort Data by 'Age' in descending order
c = df.sort_values(by='Age', ascending=False)
c

Unnamed: 0.1,Unnamed: 0,Price,Age,KM,FuelType,HP,MetColor,Automatic,CC,Doors,Weight
1051,1051,6150,80.0,194765.0,Diesel,72,0.0,0,2000,3,1120
1394,1394,7450,80.0,49580.0,,110,1.0,0,1600,4,1035
1385,1385,8250,80.0,52700.0,Petrol,110,0.0,0,1600,3,1050
1062,1062,6750,80.0,160000.0,Petrol,86,0.0,0,1300,3,1015
1369,1369,8250,80.0,57787.0,Petrol,86,0.0,0,1300,3,1015
...,...,...,...,...,...,...,...,...,...,...,...
1416,1416,8950,,40093.0,Petrol,110,0.0,0,1600,5,1114
1422,1422,7600,,36000.0,,110,1.0,0,1600,3,1050
1427,1427,8950,,29000.0,Petrol,86,1.0,1,1300,3,1045
1431,1431,7500,,20544.0,Petrol,86,1.0,0,1300,3,1025


In [9]:
# d. Transposing Data (summary stats)
d = df.describe().transpose()
d

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Unnamed: 0,1436.0,717.5,414.681806,0.0,358.75,717.5,1076.25,1435.0
Price,1436.0,10730.824513,3626.964585,4350.0,8450.0,9900.0,11950.0,32500.0
Age,1336.0,55.672156,18.589804,1.0,43.0,60.0,70.0,80.0
KM,1421.0,68647.239972,37333.023589,1.0,43210.0,63634.0,87000.0,243000.0
MetColor,1286.0,0.674961,0.468572,0.0,0.0,1.0,1.0,1.0
Automatic,1436.0,0.05571,0.229441,0.0,0.0,0.0,0.0,1.0
CC,1436.0,1566.827994,187.182436,1300.0,1400.0,1600.0,1600.0,2000.0
Weight,1436.0,1072.45961,52.64112,1000.0,1040.0,1070.0,1085.0,1615.0


In [10]:

# e. Shape and reshape Data (pivot: average Price by FuelType and Automatic)
e = df.pivot_table(index='FuelType', columns='Automatic', values='Price', aggfunc='mean')
e

Automatic,0,1
FuelType,Unnamed: 1_level_1,Unnamed: 2_level_1
CNG,9584.0,
Diesel,10889.277778,
Petrol,10650.113225,11292.054795
