In [1]:
import numpy as np
import pandas as pd

In [2]:
# Q1. Pandas version
pd.__version__

'2.2.2'

In [3]:
df = pd.read_csv('laptops.csv')

In [4]:
# Q2. Records count
len(df)

2160

In [5]:
df.head()

Unnamed: 0,Laptop,Status,Brand,Model,CPU,RAM,Storage,Storage type,GPU,Screen,Touch,Final Price
0,ASUS ExpertBook B1 B1502CBA-EJ0436X Intel Core...,New,Asus,ExpertBook,Intel Core i5,8,512,SSD,,15.6,No,1009.0
1,Alurin Go Start Intel Celeron N4020/8GB/256GB ...,New,Alurin,Go,Intel Celeron,8,256,SSD,,15.6,No,299.0
2,ASUS ExpertBook B1 B1502CBA-EJ0424X Intel Core...,New,Asus,ExpertBook,Intel Core i3,8,256,SSD,,15.6,No,789.0
3,MSI Katana GF66 12UC-082XES Intel Core i7-1270...,New,MSI,Katana,Intel Core i7,16,1000,SSD,RTX 3050,15.6,No,1199.0
4,HP 15S-FQ5085NS Intel Core i5-1235U/16GB/512GB...,New,HP,15S,Intel Core i5,16,512,SSD,,15.6,No,669.01


In [8]:
# Q3. Laptop brands
df.Brand.nunique()

27

In [9]:
# df.Brand.value_counts()

In [10]:
# Q4. Missing values
df.isnull().sum()

Laptop             0
Status             0
Brand              0
Model              0
CPU                0
RAM                0
Storage            0
Storage type      42
GPU             1371
Screen             4
Touch              0
Final Price        0
dtype: int64

In [11]:
# Q5. Maximum final price
# What's the maximum final price of Dell notebooks in the dataset?
df.loc[df['Brand'] == "Dell"]["Final Price"].max()

3936.0

In [12]:
# Q6. Median value of Screen
# Find the median value of Screen column in the dataset.
df.Screen.median()

15.6

In [13]:
# Next, calculate the most frequent value of the same Screen column.
df['Screen'].value_counts().index[0] # .keys()[0:5]

15.6

In [14]:
df['Screen'].mode()

0    15.6
Name: Screen, dtype: float64

In [15]:
# Use fillna method to fill the missing values in Screen column 
# with the most frequent value from the previous step.
df['Screen'] = df['Screen'].fillna(df['Screen'].value_counts().index[0])

In [16]:
df.isnull().sum()

Laptop             0
Status             0
Brand              0
Model              0
CPU                0
RAM                0
Storage            0
Storage type      42
GPU             1371
Screen             0
Touch              0
Final Price        0
dtype: int64

In [17]:
# Now, calculate the median value of Screen once again.
df.Screen.median()
# Has it changed?

15.6

In [18]:
# Q7. Sum of weights
# Select all the "Innjoo" laptops from the dataset.
df.loc[df['Brand'] == "Innjoo"]

Unnamed: 0,Laptop,Status,Brand,Model,CPU,RAM,Storage,Storage type,GPU,Screen,Touch,Final Price
1478,InnJoo Voom Excellence Intel Celeron N4020/8GB...,New,Innjoo,Voom,Intel Celeron,8,256,SSD,,15.6,No,311.37
1479,InnJoo Voom Excellence Pro Intel Celeron N4020...,New,Innjoo,Voom,Intel Celeron,8,512,SSD,,15.6,No,392.55
1480,Innjoo Voom Intel Celeron N3350/4GB/64GB eMMC/...,New,Innjoo,Voom,Intel Celeron,4,64,eMMC,,14.1,No,251.4
1481,Innjoo Voom Laptop Max Intel Celeron N3350/6GB...,New,Innjoo,Voom,Intel Celeron,6,64,eMMC,,14.1,No,383.61
1482,Innjoo Voom Laptop Pro Intel Celeron N3350/6GB...,New,Innjoo,Voom,Intel Celeron,6,128,SSD,,14.1,No,317.02
1483,Innjoo Voom Pro Intel Celeron N3350/6GB/128GB ...,New,Innjoo,Voom,Intel Celeron,6,128,eMMC,,14.1,No,431.38


In [19]:
# Select only columns RAM, Storage, Screen.
df.loc[df['Brand'] == "Innjoo"][["RAM", "Storage", "Screen"]]

Unnamed: 0,RAM,Storage,Screen
1478,8,256,15.6
1479,8,512,15.6
1480,4,64,14.1
1481,6,64,14.1
1482,6,128,14.1
1483,6,128,14.1


In [20]:
# Get the underlying NumPy array
X = df.loc[df['Brand'] == "Innjoo"][["RAM", "Storage", "Screen"]].to_numpy()

In [21]:
X

array([[  8. , 256. ,  15.6],
       [  8. , 512. ,  15.6],
       [  4. ,  64. ,  14.1],
       [  6. ,  64. ,  14.1],
       [  6. , 128. ,  14.1],
       [  6. , 128. ,  14.1]])

In [23]:
# Compute matrix-matrix multiplication between the transpose of X and X. 
# To get the transpose, use X.T. Let's call the result XTX.
X_transpose = X.T

In [24]:
X_transpose

array([[  8. ,   8. ,   4. ,   6. ,   6. ,   6. ],
       [256. , 512. ,  64. ,  64. , 128. , 128. ],
       [ 15.6,  15.6,  14.1,  14.1,  14.1,  14.1]])

In [25]:
X.shape, X_transpose.shape

((6, 3), (3, 6))

In [26]:
XTX = X.dot(X_transpose)

In [27]:
XTX

array([[ 65843.36, 131379.36,  16635.96,  16651.96,  33035.96,  33035.96],
       [131379.36, 262451.36,  33019.96,  33035.96,  65803.96,  65803.96],
       [ 16635.96,  33019.96,   4310.81,   4318.81,   8414.81,   8414.81],
       [ 16651.96,  33035.96,   4318.81,   4330.81,   8426.81,   8426.81],
       [ 33035.96,  65803.96,   8414.81,   8426.81,  16618.81,  16618.81],
       [ 33035.96,  65803.96,   8414.81,   8426.81,  16618.81,  16618.81]])

In [28]:
XTX_2 = X_transpose.dot(X)

In [29]:
XTX_2

array([[2.52000e+02, 8.32000e+03, 5.59800e+02],
       [8.32000e+03, 3.68640e+05, 1.73952e+04],
       [5.59800e+02, 1.73952e+04, 1.28196e+03]])

In [30]:
XTX.shape, XTX_2.shape

((6, 6), (3, 3))

In [31]:
# Compute the inverse of XTX.
from numpy.linalg import inv
XTX_inv = inv(XTX)
XTX_inv_2 = inv(XTX_2)

In [32]:
XTX_inv

array([[ 1.24800297e+11,  1.02652371e+10,  9.19595127e+10,
         1.90495156e+11,  2.24035328e-05, -4.31888877e+11],
       [-1.05471416e+10,  2.96552205e+10,  1.30097558e+10,
         1.39761735e+11,  2.58838288e-05, -1.73912344e+11],
       [ 7.77893826e+10,  2.71798860e+10,  7.14684382e+10,
         2.24855756e+11, -2.98219776e-05, -4.12460406e+11],
       [ 8.42191802e+10,  1.62788196e+11,  1.68175235e+11,
         9.24437543e+11, -0.00000000e+00, -1.36589754e+12],
       [-2.88416309e+11, -2.34135397e+11, -3.55779885e+11,
        -1.51468390e+12,  2.44860610e+12,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00, -2.44860610e+12,  2.44860610e+12]])

In [33]:
XTX_inv.shape, XTX_inv_2.shape

((6, 6), (3, 3))

In [34]:
# Create an array y with values [1100, 1300, 800, 900, 1000, 1100].
y = np.array([1100, 1300, 800, 900, 1000, 1100])

In [35]:
# Multiply the inverse of XTX with the transpose of X, 
# and then multiply the result by y. Call the result w

XTX_inv.shape, X_transpose.shape, y.shape

((6, 6), (3, 6), (6,))

In [36]:
w = X_transpose.dot(XTX_inv).dot(y)

In [37]:
w

array([295.28808594,  17.578125  ,  36.68852523])

In [38]:
XTX_inv_2.shape, X_transpose.shape, y.shape

((3, 3), (3, 6), (6,))

In [39]:
w2 = XTX_inv_2.dot(X_transpose).dot(y)

In [40]:
w2

array([45.58076606,  0.42783519, 45.29127938])

In [41]:
# What's the sum of all the elements of the result?
w.sum(), w2.sum()

(349.5547361658441, 91.2998806299557)