In [1]:
import pandas as pd
from scipy.stats import zscore

# Read dataset
df = pd.read_csv("data/Toyota.csv")
df.head(5)

Unnamed: 0.1,Unnamed: 0,Price,Age,KM,FuelType,HP,MetColor,Automatic,CC,Doors,Weight
0,0,13500,23.0,46986,Diesel,90,1.0,0,2000,three,1165
1,1,13750,23.0,72937,Diesel,90,1.0,0,2000,3,1165
2,2,13950,24.0,41711,Diesel,90,,0,2000,3,1165
3,3,14950,26.0,48000,Diesel,90,0.0,0,2000,3,1165
4,4,13750,30.0,38500,Diesel,90,0.0,0,2000,3,1170


In [2]:
# a. Remove all missing values
df_cleaned = df.dropna()  # Removes rows with any missing values
df_cleaned  # 🔹 Data after removing missing values

Unnamed: 0.1,Unnamed: 0,Price,Age,KM,FuelType,HP,MetColor,Automatic,CC,Doors,Weight
0,0,13500,23.0,46986,Diesel,90,1.0,0,2000,three,1165
1,1,13750,23.0,72937,Diesel,90,1.0,0,2000,3,1165
3,3,14950,26.0,48000,Diesel,90,0.0,0,2000,3,1165
4,4,13750,30.0,38500,Diesel,90,0.0,0,2000,3,1170
5,5,12950,32.0,61000,Diesel,90,0.0,0,2000,3,1170
...,...,...,...,...,...,...,...,...,...,...,...
1425,1425,7950,80.0,??,Petrol,86,1.0,0,1300,4,1000
1429,1429,8950,78.0,24000,Petrol,86,1.0,1,1300,5,1065
1430,1430,8450,80.0,23000,Petrol,86,0.0,0,1300,3,1015
1432,1432,10845,72.0,??,Petrol,86,0.0,0,1300,3,1015


In [3]:
# b. Display datatypes and concise summary of all numeric variables
df_datatypes = df.dtypes  # Display data types of each column
df_numeric_summary = df.describe()  # Summary of numeric variables
df_datatypes, df_numeric_summary  # 🔹 Display datatypes and summary of numeric variables

(Unnamed: 0      int64
 Price           int64
 Age           float64
 KM             object
 FuelType       object
 HP             object
 MetColor      float64
 Automatic       int64
 CC              int64
 Doors          object
 Weight          int64
 dtype: object,
         Unnamed: 0         Price          Age     MetColor    Automatic  \
 count  1436.000000   1436.000000  1336.000000  1286.000000  1436.000000   
 mean    717.500000  10730.824513    55.672156     0.674961     0.055710   
 std     414.681806   3626.964585    18.589804     0.468572     0.229441   
 min       0.000000   4350.000000     1.000000     0.000000     0.000000   
 25%     358.750000   8450.000000    43.000000     0.000000     0.000000   
 50%     717.500000   9900.000000    60.000000     1.000000     0.000000   
 75%    1076.250000  11950.000000    70.000000     1.000000     0.000000   
 max    1435.000000  32500.000000    80.000000     1.000000     1.000000   
 
                 CC      Weight  
 count  143

In [4]:
# c. Remove All duplicate records
df_no_duplicates = df.drop_duplicates()  # Removes duplicate rows
df_no_duplicates  # 🔹 Data after removing duplicates

Unnamed: 0.1,Unnamed: 0,Price,Age,KM,FuelType,HP,MetColor,Automatic,CC,Doors,Weight
0,0,13500,23.0,46986,Diesel,90,1.0,0,2000,three,1165
1,1,13750,23.0,72937,Diesel,90,1.0,0,2000,3,1165
2,2,13950,24.0,41711,Diesel,90,,0,2000,3,1165
3,3,14950,26.0,48000,Diesel,90,0.0,0,2000,3,1165
4,4,13750,30.0,38500,Diesel,90,0.0,0,2000,3,1170
...,...,...,...,...,...,...,...,...,...,...,...
1431,1431,7500,,20544,Petrol,86,1.0,0,1300,3,1025
1432,1432,10845,72.0,??,Petrol,86,0.0,0,1300,3,1015
1433,1433,8500,,17016,Petrol,86,0.0,0,1300,3,1015
1434,1434,7250,70.0,??,,86,1.0,0,1300,3,1015


In [5]:
# d. Apply Z-score Normalization on Price Column
df['Price_zscore'] = zscore(df['Price'])  # Apply Z-score normalization
df[['Price', 'Price_zscore']]  # 🔹 Data after applying Z-score normalization

Unnamed: 0,Price,Price_zscore
0,13500,0.763763
1,13750,0.832715
2,13950,0.887877
3,14950,1.163685
4,13750,0.832715
...,...,...
1431,7500,-0.891089
1432,10845,0.031491
1433,8500,-0.615281
1434,7250,-0.960042


In [6]:
# e. Shape and reshape using pivot_table
pivot_table = df.pivot_table(values='Price', index='FuelType', columns='Age', aggfunc='mean')  # Example pivot table
pivot_table  # 🔹 Reshaped data using pivot_table

Age,1.0,2.0,4.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,...,71.0,72.0,73.0,74.0,75.0,76.0,77.0,78.0,79.0,80.0
FuelType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CNG,,,,,,,,,,,...,6450.0,,,,,,,,5250.0,7460.0
Diesel,,,31137.5,,22950.0,24963.333333,,,23000.0,,...,7670.0,,7266.666667,7087.5,7900.0,7275.0,8583.333333,6475.0,7450.0,7825.0
Petrol,18020.0,21125.0,,22500.0,19050.0,20714.285714,18546.666667,18450.0,18650.0,20000.0,...,7949.647059,7783.055556,7949.6,7605.681818,8033.709677,8044.736842,7985.0,7891.891892,8305.0,7801.787234
