In [1]:
!pip install numpy pandas matplotlib seaborn


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns



In [2]:
print("Pandas version:", pd.__version__)

Pandas version: 2.2.2


In [3]:
!wget -q https://raw.githubusercontent.com/alexeygrigorev/datasets/master/car_fuel_efficiency.csv -O car_fuel_efficiency.csv


df = pd.read_csv("car_fuel_efficiency.csv")
print("\nDataset loaded. Shape:", df.shape)


Dataset loaded. Shape: (9704, 11)


In [8]:
print("First few rows of the dataset:")
print(df.head())

First few rows of the dataset:
   engine_displacement  num_cylinders  horsepower  vehicle_weight  \
0                  170            3.0       159.0     3413.433759   
1                  130            5.0        97.0     3149.664934   
2                  170            NaN        78.0     3079.038997   
3                  220            4.0         NaN     2542.392402   
4                  210            1.0       140.0     3460.870990   

   acceleration  model_year  origin fuel_type         drivetrain  num_doors  \
0          17.7        2003  Europe  Gasoline    All-wheel drive        0.0   
1          17.8        2007     USA  Gasoline  Front-wheel drive        0.0   
2          15.1        2018  Europe  Gasoline  Front-wheel drive        0.0   
3          20.2        2009     USA    Diesel    All-wheel drive        2.0   
4          14.4        2009  Europe  Gasoline    All-wheel drive        2.0   

   fuel_efficiency_mpg  
0            13.231729  
1            13.688217  
2   

In [4]:
print("Number of records:", len(df))

Number of records: 9704


In [5]:
print("Number of unique fuel types:", df['fuel_type'].nunique())

Number of unique fuel types: 2


In [6]:
missing_cols = df.columns[df.isnull().any()].tolist()
print("Columns with missing values:", missing_cols)
print("Count:", len(missing_cols))

Columns with missing values: ['num_cylinders', 'horsepower', 'acceleration', 'num_doors']
Count: 4


In [9]:
asia_cars = df[df['origin'] == 'Asia']
max_eff = asia_cars['fuel_efficiency_mpg'].max()
print("Max fuel efficiency for Asia cars:", max_eff)

Max fuel efficiency for Asia cars: 23.759122836520497


In [10]:
median_hp_before = df['horsepower'].median()

mode_hp = df['horsepower'].mode()[0]
filled_df = df.copy()
filled_df['horsepower'] = filled_df['horsepower'].fillna(mode_hp)
median_hp_after = filled_df['horsepower'].median()


print("Median horsepower before:", median_hp_before)
print("Mode horsepower:", mode_hp)
print("Median horsepower after filling:", median_hp_after)
print("Has median changed?", "Yes" if median_hp_before != median_hp_after else "No")

Median horsepower before: 149.0
Mode horsepower: 152.0
Median horsepower after filling: 152.0
Has median changed? Yes


In [11]:
asia_subset = asia_cars[['vehicle_weight', 'model_year']].head(7)
X = asia_subset.values
XTX = X.T.dot(X)
XTX_inv = np.linalg.inv(XTX)
y = np.array([1100, 1300, 800, 900, 1000, 1100, 1200])
w = XTX_inv.dot(X.T).dot(y)
print("Sum of elements in w:", w.sum())

Sum of elements in w: 0.5187709081074016
