**Set up the environment**

In [1]:
!pip install pandas numpy matplotlib seaborn



In [2]:
import pandas as pd
import numpy as np

import seaborn as sns
from matplotlib import pyplot as plt
%matplotlib inline

**Q1. Pandas version**

In [3]:
print(pd.__version__)

2.2.2


Getting the data

In [4]:
!wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/car_fuel_efficiency.csv

--2025-09-29 11:39:21--  https://raw.githubusercontent.com/alexeygrigorev/datasets/master/car_fuel_efficiency.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 874188 (854K) [text/plain]
Saving to: ‘car_fuel_efficiency.csv’


2025-09-29 11:39:21 (126 MB/s) - ‘car_fuel_efficiency.csv’ saved [874188/874188]



In [5]:
data_frame = pd.read_csv("car_fuel_efficiency.csv")


**Q2. Records count**

How many records are in the dataset?

- 4704
- 8704
- 9704
- 17704

In [6]:
num_records = data_frame.shape[0]
num_records

9704

**Q3. Fuel types**

How many fuel types are presented in the dataset?

- 1
- 2
- 3
- 4

In [7]:
fuel_type = data_frame['fuel_type'].nunique()
print(fuel_type)

2


**Q4. Missing values**

How many columns in the dataset have missing values?

- 0
- 1
- 2
- 3
- 4

In [8]:
missing_values = data_frame.isnull().sum()
print(missing_values)

engine_displacement      0
num_cylinders          482
horsepower             708
vehicle_weight           0
acceleration           930
model_year               0
origin                   0
fuel_type                0
drivetrain               0
num_doors              502
fuel_efficiency_mpg      0
dtype: int64


**Q5. Max fuel efficiency**

What's the maximum fuel efficiency of cars from Asia?

- 13.75
- 23.75
- 33.75
- 43.75

In [9]:
Asian_cars = data_frame[data_frame['origin'] == 'Asia']

max_fuel_efficiency = Asian_cars['fuel_efficiency_mpg'].max()
print(round(max_fuel_efficiency, 2))

23.76


**Q6. Median value of horsepower**



1. Find the median value of `horsepower` column in the dataset.
2. Next, calculate the most frequent value of the same `horsepower` column.
3. Use `fillna` method to fill the missing values in `horsepower` column with the most frequent value from the previous step.
4. Now, calculate the median value of `horsepower` once again.

Has it changed?


- Yes, it increased
- Yes, it decreased
- No

In [11]:
#1. Find the median value of `horsepower` column in the dataset.

median_val = data_frame['horsepower'].median()
print(median_val)

#2. Next, calculate the most frequent value of the same `horsepower` column.

most_frequent_value = data_frame['horsepower'].mode()[0]
print(most_frequent_value)

149.0
152.0


In [12]:
#3. Use `fillna` method to fill the missing values in `horsepower` column with the most frequent value from the previous step.

data_frame['horsepower'] = data_frame['horsepower'].fillna(most_frequent_value)

In [14]:
print(data_frame['horsepower'].isnull().sum())

0


In [15]:
#4. Now, calculate the median value of `horsepower` once again.

median_value_after_filling = data_frame['horsepower'].median()
print(median_value_after_filling)

if (median_val == median_value_after_filling):
  print("No, it hasn't changed")
else:
  print("Yes, it has changed")


152.0
Yes, it has changed


**Q7. Sum of weights**

1. Select all the cars from Asia
2. Select only columns `vehicle_weight` and `model_year`
3. Select the first 7 values
4. Get the underlying NumPy array. Let's call it `X`.
5. Compute matrix-matrix multiplication between the transpose of `X` and `X`. To get the transpose, use `X.T`. Let's call the result `XTX`.
6. Invert `XTX`.
7. Create an array `y` with values `[1100, 1300, 800, 900, 1000, 1100, 1200]`.
8. Multiply the inverse of `XTX` with the transpose of `X`, and then multiply the result by `y`. Call the result `w`.
9. What's the sum of all the elements of the result?

> **Note**: You just implemented linear regression. We'll talk about it in the next lesson.

- 0.051
- 0.51
- 5.1
- 51


In [17]:
#1. Select all the cars from Asia

asian_cars = data_frame[data_frame['origin'] == 'Asia']

#2. Select only columns vehicle_weight and model_year

df_selected = asian_cars[['vehicle_weight', 'model_year']]

#3. Select the first 7 values

df_selected = df_selected.head(7)
print(df_selected)

    vehicle_weight  model_year
8      2714.219310        2016
12     2783.868974        2010
14     3582.687368        2007
20     2231.808142        2011
21     2659.431451        2016
34     2844.227534        2014
38     3761.994038        2019


In [18]:
#4. Get the underlying NumPy array. Let's call it X.

X = df_selected.to_numpy()
print(X)


[[2714.21930965 2016.        ]
 [2783.86897424 2010.        ]
 [3582.68736772 2007.        ]
 [2231.8081416  2011.        ]
 [2659.43145076 2016.        ]
 [2844.22753389 2014.        ]
 [3761.99403819 2019.        ]]


In [19]:
#5. Compute matrix-matrix multiplication between the transpose of X and X. To get the transpose, use X.T. Let's call the result XTX.

XTX = np.dot(X.T, X)
print(XTX)

[[62248334.33150762 41431216.5073268 ]
 [41431216.5073268  28373339.        ]]


In [20]:
#6. Invert XTX.

XTX_inverse = np.linalg.inv(XTX)
print(XTX_inverse)

[[ 5.71497081e-07 -8.34509443e-07]
 [-8.34509443e-07  1.25380877e-06]]


In [21]:
#7. Create an array y with values [1100, 1300, 800, 900, 1000, 1100, 1200].

y = np.array([1100, 1300, 800, 900, 1000, 1100, 1200])
print(y)

[1100 1300  800  900 1000 1100 1200]


In [22]:
#8. Multiply the inverse of XTX with the transpose of X, and then multiply the result by y. Call the result w.

w = np.dot(np.dot(XTX_inverse, X.T), y)
print(w)

[0.01386421 0.5049067 ]


In [23]:
#9. What's the sum of all the elements of the result?

sum_of_elements = np.sum(w)
print(round(sum_of_elements,2))

0.52
