In [1]:
import pandas as pd
from scipy import stats

# Read the dataset
df = pd.read_csv("data/Toyota.csv")
df.head(5)

Unnamed: 0.1,Unnamed: 0,Price,Age,KM,FuelType,HP,MetColor,Automatic,CC,Doors,Weight
0,0,13500,23.0,46986,Diesel,90,1.0,0,2000,three,1165
1,1,13750,23.0,72937,Diesel,90,1.0,0,2000,3,1165
2,2,13950,24.0,41711,Diesel,90,,0,2000,3,1165
3,3,14950,26.0,48000,Diesel,90,0.0,0,2000,3,1165
4,4,13750,30.0,38500,Diesel,90,0.0,0,2000,3,1170


In [2]:
# a. Add a new column 'Revised' to the dataset specifying 5% increase in old Price
df['Revised'] = df['Price'] * 1.05  # Increase Price by 5%


In [3]:
# b. Create subset of cars’ data having Price greater than 15000 and Age less than 8
subset = df[(df['Price'] > 15000) & (df['Age'] < 8)]


In [4]:
# c. Sort observations in descending order of Revised Price
sorted_df = df.sort_values(by='Revised', ascending=False)


In [5]:
# d. Apply ZScore_normalization on HP column
# Convert HP column to numeric (if any non-numeric values exist)
df['HP'] = pd.to_numeric(df['HP'], errors='coerce')


In [6]:
# Handle missing values by filling with mean of the column (if any NaNs exist)
df['HP'].fillna(df['HP'].mean(), inplace=True)

# Z-Score Normalization (standardization)
df['HP_zscore'] = stats.zscore(df['HP'])

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['HP'].fillna(df['HP'].mean(), inplace=True)


In [7]:
# Display the results after each operation
df.head(), subset.head(), sorted_df.head(), df[['HP', 'HP_zscore']].head()

(   Unnamed: 0  Price   Age     KM FuelType    HP  MetColor  Automatic    CC  \
 0           0  13500  23.0  46986   Diesel  90.0       1.0          0  2000   
 1           1  13750  23.0  72937   Diesel  90.0       1.0          0  2000   
 2           2  13950  24.0  41711   Diesel  90.0       NaN          0  2000   
 3           3  14950  26.0  48000   Diesel  90.0       0.0          0  2000   
 4           4  13750  30.0  38500   Diesel  90.0       0.0          0  2000   
 
    Doors  Weight  Revised  HP_zscore  
 0  three    1165  14175.0  -0.779131  
 1      3    1165  14437.5  -0.779131  
 2      3    1165  14647.5  -0.779131  
 3      3    1165  15697.5  -0.779131  
 4      3    1170  14437.5  -0.779131  ,
      Unnamed: 0  Price  Age     KM FuelType   HP  MetColor  Automatic    CC  \
 110         110  31000  4.0   4000   Diesel  116       1.0          0  2000   
 111         111  31275  4.0   1500   Diesel  116       1.0          0  2000   
 114         114  22950  7.0  10000  