In [12]:
import pandas as pd
import numpy as np

url = "https://raw.githubusercontent.com/alexeygrigorev/datasets/master/car_fuel_efficiency.csv"
df = pd.read_csv(url)
df.head()

Unnamed: 0,engine_displacement,num_cylinders,horsepower,vehicle_weight,acceleration,model_year,origin,fuel_type,drivetrain,num_doors,fuel_efficiency_mpg
0,170,3.0,159.0,3413.433759,17.7,2003,Europe,Gasoline,All-wheel drive,0.0,13.231729
1,130,5.0,97.0,3149.664934,17.8,2007,USA,Gasoline,Front-wheel drive,0.0,13.688217
2,170,,78.0,3079.038997,15.1,2018,Europe,Gasoline,Front-wheel drive,0.0,14.246341
3,220,4.0,,2542.392402,20.2,2009,USA,Diesel,All-wheel drive,2.0,16.912736
4,210,1.0,140.0,3460.87099,14.4,2009,Europe,Gasoline,All-wheel drive,2.0,12.488369


In [3]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9704 entries, 0 to 9703
Data columns (total 11 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   engine_displacement  9704 non-null   int64  
 1   num_cylinders        9222 non-null   float64
 2   horsepower           8996 non-null   float64
 3   vehicle_weight       9704 non-null   float64
 4   acceleration         8774 non-null   float64
 5   model_year           9704 non-null   int64  
 6   origin               9704 non-null   object 
 7   fuel_type            9704 non-null   object 
 8   drivetrain           9704 non-null   object 
 9   num_doors            9202 non-null   float64
 10  fuel_efficiency_mpg  9704 non-null   float64
dtypes: float64(6), int64(2), object(3)
memory usage: 834.1+ KB
None


In [4]:
# --- Q1: Pandas version ---
import sys
print("Pandas version:", pd.__version__)

Pandas version: 2.3.1


In [5]:
# --- Q2: Records count ---
q2 = len(df)
print("Q2: Records count =", q2)

Q2: Records count = 9704


In [6]:
# --- Q3: Fuel types ---
q3 = df["fuel_type"].nunique(dropna=True)
print("Q3: Fuel types =", q3)

Q3: Fuel types = 2


In [7]:
# --- Q4: Missing values ---
missing_per_col = df.isna().sum()
q4 = int((missing_per_col > 0).sum())
print("Q4: # Columns with missing values =", q4)
print("Missing per column:\n", missing_per_col[missing_per_col>0])

Q4: # Columns with missing values = 4
Missing per column:
 num_cylinders    482
horsepower       708
acceleration     930
num_doors        502
dtype: int64


In [8]:
# --- Q5: Max fuel efficiency ---
q5 = df.loc[df["origin"] == "Asia", "fuel_efficiency_mpg"].max()
print("Q5: Max fuel efficiency =", q5)

Q5: Max fuel efficiency = 23.759122836520497


In [9]:
# --- Q6: Median value of horsepower ---
median_before = df["horsepower"].median(skipna=True)

mode_vals = df["horsepower"].mode(dropna=True)
mode_hp = float(mode_vals.iloc[0]) if not mode_vals.empty else float(median_before)

hp_filled = df["horsepower"].fillna(mode_hp)

median_after = hp_filled.median()

if median_after > median_before:
    q6 = "Yes, it increased"
elif median_after < median_before:
    q6 = "Yes, it decreased"
else:
    q6 = "No"

print("Q6: Median value of horsepower =", q6)

Q6: Median value of horsepower = Yes, it increased


In [None]:
# --- Q7: Sum of weights ---
asia = df[df["origin"] == "Asia"][["vehicle_weight", "model_year"]].head(7)

X = asia.to_numpy(dtype=float)

XTX = X.T @ X
XTX_inv = np.linalg.inv(XTX)

y = np.array([1100, 1300, 800, 900, 1000, 1100, 1200], dtype=float)

w = XTX_inv @ (X.T @ y)
q7 = float(w.sum())

print("Q7: Sum of weights =", q7)

Q7: Sum of weights = 0.5187709081074039
