In [1]:
import pandas as pd
import numpy as np

import seaborn as sns
from matplotlib import pyplot as plt
%matplotlib inline

### Q1. Pandas version

In [2]:
pd.__version__

'2.3.1'

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/alexeygrigorev/datasets/master/car_fuel_efficiency.csv')

In [4]:
df.head()

Unnamed: 0,engine_displacement,num_cylinders,horsepower,vehicle_weight,acceleration,model_year,origin,fuel_type,drivetrain,num_doors,fuel_efficiency_mpg
0,170,3.0,159.0,3413.433759,17.7,2003,Europe,Gasoline,All-wheel drive,0.0,13.231729
1,130,5.0,97.0,3149.664934,17.8,2007,USA,Gasoline,Front-wheel drive,0.0,13.688217
2,170,,78.0,3079.038997,15.1,2018,Europe,Gasoline,Front-wheel drive,0.0,14.246341
3,220,4.0,,2542.392402,20.2,2009,USA,Diesel,All-wheel drive,2.0,16.912736
4,210,1.0,140.0,3460.87099,14.4,2009,Europe,Gasoline,All-wheel drive,2.0,12.488369


### Q2. Records count

In [5]:
df.shape

(9704, 11)

### Q3. Fuel types

In [8]:
fuel_type_count = df['fuel_type'].value_counts()
print(fuel_type_count)

fuel_type
Gasoline    4898
Diesel      4806
Name: count, dtype: int64


### Q4. Missing values

In [9]:
missing_values = df.isnull().sum()
print(missing_values)

engine_displacement      0
num_cylinders          482
horsepower             708
vehicle_weight           0
acceleration           930
model_year               0
origin                   0
fuel_type                0
drivetrain               0
num_doors              502
fuel_efficiency_mpg      0
dtype: int64


### Q5. Max fuel efficiency

In [13]:
max_fuel_efficiency_asia = df[df['origin'] == 'Asia']['fuel_efficiency_mpg'].max()
print(max_fuel_efficiency_asia)

23.759122836520497


### Q6. Median value of horsepower

In [16]:
# Find the median value of horsepower
median_before = df['horsepower'].median()
print(f"Median horsepower (before filling): {median_before}")

# Calculate the most frequent value (mode) of horsepower
most_frequent = df['horsepower'].mode()
print(f"Most frequent horsepower value: {most_frequent}")

# Fill missing values with the most frequent value 
df['horsepower'] = df['horsepower'].fillna(most_frequent)

# Calculate the median again (after filling)
median_after = df['horsepower'].median()
print(f"Median horsepower (after filling): {median_after}")

# Compare the medians
print(f"\nMedian before: {median_before}")
print(f"Median after: {median_after}")

if median_after > median_before:
    print("Answer: Yes, it increased")
elif median_after < median_before:
    print("Answer: Yes, it decreased")
else:
    print("Answer: No")

Median horsepower (before filling): 149.0
Most frequent horsepower value: 0    152.0
Name: horsepower, dtype: float64
Median horsepower (after filling): 149.0

Median before: 149.0
Median after: 149.0
Answer: No


### Q7. Sum of weights

In [17]:

# Select all cars from Asia
asia_cars = df[df['origin'] == 'Asia']

# Select only columns 'vehicle_weight' and 'model_year'
selected_columns = asia_cars[['vehicle_weight', 'model_year']]

# Select the first 7 values
first_7 = selected_columns.head(7)

# Get the underlying NumPy array
X = first_7.values
print("X shape:", X.shape)
print("X:\n", X)

# Compute XTX (X transpose times X)
XTX = np.dot(X.T, X)
print("\nXTX:\n", XTX)

# Invert XTX
XTX_inv = np.linalg.inv(XTX)
print("\nXTX inverse:\n", XTX_inv)

# Create array y
y = np.array([1100, 1300, 800, 900, 1000, 1100, 1200])
print("\ny:", y)

# Multiply inverse of XTX with transpose of X, then multiply by y
# w = (XTX)^(-1) * X^T * y
w =  np.dot(np.dot(XTX_inv, X.T), y)
print("\nw:", w)

# Sum of all elements of w
sum_w = w.sum()
print(f"\nSum of all elements in w: {sum_w}")

X shape: (7, 2)
X:
 [[2714.21930965 2016.        ]
 [2783.86897424 2010.        ]
 [3582.68736772 2007.        ]
 [2231.8081416  2011.        ]
 [2659.43145076 2016.        ]
 [2844.22753389 2014.        ]
 [3761.99403819 2019.        ]]

XTX:
 [[62248334.33150762 41431216.5073268 ]
 [41431216.5073268  28373339.        ]]

XTX inverse:
 [[ 5.71497081e-07 -8.34509443e-07]
 [-8.34509443e-07  1.25380877e-06]]

y: [1100 1300  800  900 1000 1100 1200]

w: [0.01386421 0.5049067 ]

Sum of all elements in w: 0.5187709081074016


In [None]:
#These are the linear regression coefficients
# w[0] = 0.01386421 → coefficient for vehicle_weight
# w[1] = 0.5049067 → coefficient for model_year

In [18]:
# Example prediction
new_weight = 3000
new_year = 2015
prediction = 0.01386421 * new_weight + 0.5049067 * new_year
print(f"Predicted value: {prediction}")

Predicted value: 1058.9796305000002
