In [2]:
import numpy as np
import pandas as pd

## Pandas version

In [3]:
pd.__version__

'2.2.2'

## Getting the data

In [15]:
url = "https://raw.githubusercontent.com/alexeygrigorev/datasets/master/laptops.csv"
df = pd.read_csv(url)


### Records count

How many records are in the dataset?

In [85]:
df.shape[0]

2160

### Laptop brands

How many laptop brands are presented in the dataset?

In [17]:
df.head()

Unnamed: 0,Laptop,Status,Brand,Model,CPU,RAM,Storage,Storage type,GPU,Screen,Touch,Final Price
0,ASUS ExpertBook B1 B1502CBA-EJ0436X Intel Core...,New,Asus,ExpertBook,Intel Core i5,8,512,SSD,,15.6,No,1009.0
1,Alurin Go Start Intel Celeron N4020/8GB/256GB ...,New,Alurin,Go,Intel Celeron,8,256,SSD,,15.6,No,299.0
2,ASUS ExpertBook B1 B1502CBA-EJ0424X Intel Core...,New,Asus,ExpertBook,Intel Core i3,8,256,SSD,,15.6,No,789.0
3,MSI Katana GF66 12UC-082XES Intel Core i7-1270...,New,MSI,Katana,Intel Core i7,16,1000,SSD,RTX 3050,15.6,No,1199.0
4,HP 15S-FQ5085NS Intel Core i5-1235U/16GB/512GB...,New,HP,15S,Intel Core i5,16,512,SSD,,15.6,No,669.01


In [20]:
df.Brand.nunique()

27

### Missing values

How many columns in the dataset have missing values?

In [24]:
df.isnull().sum()[df.isnull().sum() > 0]

Storage type      42
GPU             1371
Screen             4
dtype: int64

### Maximum final price

What's the maximum final price of Dell notebooks in the dataset?


In [28]:
df[df.Brand == 'Dell']['Final Price'].max()

3936.0

###  Median value of Screen

1. Find the median value of Screen column in the dataset.
2. Next, calculate the most frequent value of the same Screen column.
3. Use fillna method to fill the missing values in Screen column with the most frequent value from the previous step.
4. Now, calculate the median value of Screen once again.

Has it changed?

In [33]:
median_screen = df.Screen.median()
median_screen

15.6

In [35]:
df.Screen.value_counts()

Screen
15.60    1009
14.00     392
16.00     174
17.30     161
13.30     131
16.10      48
17.00      33
13.00      27
15.00      21
13.50      19
13.40      19
11.60      16
14.20      14
12.30      13
14.10      11
13.60      11
16.20      10
15.30       8
10.50       7
14.40       6
12.40       6
15.40       5
12.00       4
18.00       3
14.50       3
13.90       2
12.50       1
10.10       1
10.95       1
Name: count, dtype: int64

In [36]:
mode_screen = df.Screen.mode()
mode_screen

0    15.6
Name: Screen, dtype: float64

In [37]:
df['Screen'].fillna(mode_screen, inplace = True)

In [38]:
df.Screen.median()

15.6

The value kept the same.

### Sum of weights

1. Select all the "Innjoo" laptops from the dataset.
2. Select only columns RAM, Storage, Screen.
3. Get the underlying NumPy array. Let's call it X.
4. Compute matrix-matrix multiplication between the transpose of X and X. To get the transpose, use X.T. Let's call the result XTX.
5. Compute the inverse of XTX.
6. Create an array y with values [1100, 1300, 800, 900, 1000, 1100].
7. Multiply the inverse of XTX with the transpose of X, and then multiply the result by y. Call the result w.
8. What's the sum of all the elements of the result?

In [45]:
innjoo_laptops =  df[df.Brand == 'Innjoo']
innjoo_laptops

Unnamed: 0,Laptop,Status,Brand,Model,CPU,RAM,Storage,Storage type,GPU,Screen,Touch,Final Price
1478,InnJoo Voom Excellence Intel Celeron N4020/8GB...,New,Innjoo,Voom,Intel Celeron,8,256,SSD,,15.6,No,311.37
1479,InnJoo Voom Excellence Pro Intel Celeron N4020...,New,Innjoo,Voom,Intel Celeron,8,512,SSD,,15.6,No,392.55
1480,Innjoo Voom Intel Celeron N3350/4GB/64GB eMMC/...,New,Innjoo,Voom,Intel Celeron,4,64,eMMC,,14.1,No,251.4
1481,Innjoo Voom Laptop Max Intel Celeron N3350/6GB...,New,Innjoo,Voom,Intel Celeron,6,64,eMMC,,14.1,No,383.61
1482,Innjoo Voom Laptop Pro Intel Celeron N3350/6GB...,New,Innjoo,Voom,Intel Celeron,6,128,SSD,,14.1,No,317.02
1483,Innjoo Voom Pro Intel Celeron N3350/6GB/128GB ...,New,Innjoo,Voom,Intel Celeron,6,128,eMMC,,14.1,No,431.38


In [55]:
# Select specific columns
innjoo_laptops.loc[:, ['RAM', 'Storage', 'Screen']]

Unnamed: 0,RAM,Storage,Screen
1478,8,256,15.6
1479,8,512,15.6
1480,4,64,14.1
1481,6,64,14.1
1482,6,128,14.1
1483,6,128,14.1


In [58]:
# Get array X for the previous columns
X = innjoo_laptops.loc[:, ['RAM', 'Storage', 'Screen']].values
X

array([[  8. , 256. ,  15.6],
       [  8. , 512. ,  15.6],
       [  4. ,  64. ,  14.1],
       [  6. ,  64. ,  14.1],
       [  6. , 128. ,  14.1],
       [  6. , 128. ,  14.1]])

In [59]:
# Compute matrix-matrix multiplication between the transpose of X and X
X.T

array([[  8. ,   8. ,   4. ,   6. ,   6. ,   6. ],
       [256. , 512. ,  64. ,  64. , 128. , 128. ],
       [ 15.6,  15.6,  14.1,  14.1,  14.1,  14.1]])

In [74]:
XTX = X.T.dot(X)
XTX

array([[2.52000e+02, 8.32000e+03, 5.59800e+02],
       [8.32000e+03, 3.68640e+05, 1.73952e+04],
       [5.59800e+02, 1.73952e+04, 1.28196e+03]])

In [76]:
# Compute the inverse of XTX.
XTX_inv = np.linalg.inv(XTX)
XTX_inv

array([[ 2.78025381e-01, -1.51791334e-03, -1.00809855e-01],
       [-1.51791334e-03,  1.58286725e-05,  4.48052175e-04],
       [-1.00809855e-01,  4.48052175e-04,  3.87214888e-02]])

In [77]:
# Create an array y with values [1100, 1300, 800, 900, 1000, 1100].
y = np.array([1100, 1300, 800, 900, 1000, 1100])
y

array([1100, 1300,  800,  900, 1000, 1100])

In [83]:
# Multiply the inverse of XTX with the transpose of X, and then multiply the result by y
w = XTX_inv.dot(X.T).dot(y)
w

array([45.58076606,  0.42783519, 45.29127938])

In [86]:
# What's the sum of w?
sum(w)

91.29988062995588