In [1]:
### Uncomment cell below whenever Altair stops working to reinstall latest version

## For some reason, whenever the jupyter server restarts, it
## sends you back to the old version of altair (4.2.2)

In [2]:
#pip install -U altair

In [3]:
## If the text below says anything below version 5.0.0,
## run the code above
import altair as alt; alt.__version__

'5.1.2'

In [4]:
### Run this cell before continuing.

import altair as alt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import set_config
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV, cross_validate, train_test_split
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor

# Simplify working with large datasets in Altair
alt.data_transformers.disable_max_rows()

# Output dataframes instead of arrays
set_config(transform_output="pandas")

# Function needed to visualize images
# code below sourced from: https://gist.github.com/daviddalpiaz/ae62ae5ccd0bada4b9acd6dbc9008706
def show_digit(arr784):
    plt.imshow(np.array(arr784)[1:].reshape(28, 28), cmap="gray")
    
np.random.seed(1137110237) #Randomly picked seed

In [5]:
# Loading csv file data as a pandas dataframe
laptop_data = pd.read_csv("https://raw.githubusercontent.com/fyip3/ds_project/main/data/laptopData.csv")

# Cleaning data
laptop_data = laptop_data.drop(columns=["Unnamed: 0", "TypeName"])          # Filtering Columns
laptop_data = laptop_data.dropna()                                          # Removing redundant non-numeric part
laptop_data['Ram'] = laptop_data['Ram'].str.extract('(\d+)', expand=False)
laptop_data["Price"] = laptop_data["Price"] * 0.017                         # Convert Price from INR to CAD
laptop_data = laptop_data.rename(columns={"Inches": "ScreenSize_Inches", "Ram": "Memory_GB", "Memory" : "Storage_And_Type", "Weight" : "Weight_Kg", "Price" : "Price_CAD"})


In [6]:
laptop_data

Unnamed: 0,Company,ScreenSize_Inches,ScreenResolution,Cpu,Memory_GB,Storage_And_Type,Gpu,OpSys,Weight_Kg,Price_CAD
0,Apple,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1213.437614
1,Apple,13.3,1440x900,Intel Core i5 1.8GHz,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,814.223894
2,HP,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,520.812000
3,Apple,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2298.320712
4,Apple,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1633.628736
...,...,...,...,...,...,...,...,...,...,...
1298,Lenovo,14,IPS Panel Full HD / Touchscreen 1920x1080,Intel Core i7 6500U 2.5GHz,4,128GB SSD,Intel HD Graphics 520,Windows 10,1.8kg,577.874880
1299,Lenovo,13.3,IPS Panel Quad HD+ / Touchscreen 3200x1800,Intel Core i7 6500U 2.5GHz,16,512GB SSD,Intel HD Graphics 520,Windows 10,1.3kg,1357.734240
1300,Lenovo,14,1366x768,Intel Celeron Dual Core N3050 1.6GHz,2,64GB Flash Storage,Intel HD Graphics,Windows 10,1.5kg,207.419040
1301,HP,15.6,1366x768,Intel Core i7 6500U 2.5GHz,6,1TB HDD,AMD Radeon R5 M330,Windows 10,2.19kg,692.000640


In [7]:
laptop_train, laptop_test = train_test_split(
    laptop_data,
    test_size=.25,
)

In [8]:
laptop_train.head(10)

Unnamed: 0,Company,ScreenSize_Inches,ScreenResolution,Cpu,Memory_GB,Storage_And_Type,Gpu,OpSys,Weight_Kg,Price_CAD
466,Acer,15.6,1366x768,Intel Core i3 6006U 2GHz,4,500GB HDD,Nvidia GeForce GTX 940MX,Windows 10,2.2kg,424.80144
1224,Dell,15.0,Full HD / Touchscreen 1920x1080,Intel Core i3 7100U 2.4GHz,4,500GB HDD,Intel HD Graphics 620,Windows 10,2.08kg,461.03184
240,Lenovo,15.6,1366x768,Intel Core i3 6006U 2GHz,8,128GB SSD,Intel HD Graphics 520,Windows 10,7.2kg,533.49264
757,HP,15.6,Full HD 1920x1080,Intel Core i7 6700HQ 2.6GHz,8,256GB SSD,Nvidia Quadro M1000M,Windows 7,2.59kg,1413.89136
147,Asus,15.6,Full HD 1920x1080,Intel Celeron Dual Core N3350 1.1GHz,4,1TB HDD,Intel HD Graphics 500,Windows 10,2kg,311.58144
950,HP,15.6,IPS Panel Full HD 1920x1080,Intel Core i7 6820HQ 2.7GHz,8,8GB SSD,Nvidia Quadro M1000M,Windows 10,2.0kg,2037.05424
641,HP,17.0,1600x900,AMD A9-Series 9420 3GHz,8,1TB HDD,AMD Radeon R5,Windows 10,2.6kg,471.810384
473,Dell,13.3,Full HD / Touchscreen 1920x1080,Intel Core i5 8250U 1.6GHz,8,256GB SSD,Intel UHD Graphics 620,Windows 10,1.42kg,1668.274056
1074,Lenovo,12.5,IPS Panel Full HD / Touchscreen 1920x1080,Intel Core i7 6500U 2.5GHz,8,256GB SSD,Intel HD Graphics 520,Windows 10,1.3kg,1552.010702
488,Lenovo,12.5,IPS Panel Full HD 1920x1080,Intel Core i7 7500U 2.7GHz,16,512GB SSD,Intel HD Graphics 620,Windows 10,1.36kg,1628.55648


In [9]:
laptop_brand_avg_price = (
    laptop_train.groupby(["Company"])
        .mean(["Price_CAD"])
        .reset_index()
        .rename(columns = {"Price_CAD" : "Average Price"})
)

laptop_brand_plot = alt.Chart(laptop_brand_avg_price).mark_bar().encode(
    x=alt.X("Company")
        .title("Laptop Brand"),
    y=alt.Y("Average Price")
        .title("Average Price of Laptops"),
    color=alt.Color("Company")
            .scale(scheme="category20b")
).configure_axisX(labelAngle=-45)
laptop_brand_plot

In [10]:
laptop_screen_size_avg_price = (
    laptop_train.groupby(["ScreenSize_Inches"])
        .mean(["Price_CAD"])
        .reset_index()
        .rename(columns = {"Price_CAD" : "Average Price"})
)

laptop_screen_size_plot = alt.Chart(laptop_screen_size_avg_price).mark_bar().encode(
    x=alt.X("ScreenSize_Inches")
        .title("Screen Size in inches"),
    y=alt.Y("Average Price")
        .title("Average Price of Laptops"),
    color=alt.Color("ScreenSize_Inches")
            .scale(scheme="category20b")
).configure_axisX(labelAngle=-45)
laptop_screen_size_plot

In [11]:
laptop_resolution_avg_price = (
    laptop_train.groupby(["ScreenResolution"])
        .mean(["Price_CAD"])
        .reset_index()
        .rename(columns = {"Price_CAD" : "Average Price"})
)

laptop_resolution_plot = alt.Chart(laptop_resolution_avg_price).mark_bar().encode(
    x=alt.X("ScreenResolution")
        .title("Screen Resolution"),
    y=alt.Y("Average Price")
        .title("Average Price of Laptops"),
    color=alt.Color("ScreenResolution")
            .scale(scheme="category20b")
).configure_axisX(labelAngle=-45)
laptop_resolution_plot

In [12]:
laptop_cpu_avg_price = (
    laptop_train.groupby(["Cpu"])
        .mean(["Price_CAD"])
        .reset_index()
        .rename(columns = {"Price_CAD" : "Average Price"})
)

laptop_cpu_plot = alt.Chart(laptop_cpu_avg_price).mark_bar().encode(
    x=alt.X("Cpu")
        .title("Processor"),
    y=alt.Y("Average Price")
        .title("Average Price of Laptops"),
    color=alt.Color("Cpu")
            .scale(scheme="category20b")
).configure_axisX(labelAngle=-45)
laptop_cpu_plot

In [13]:
laptop_ram_avg_price = (
    laptop_train.groupby(["Memory_GB"])
        .mean(["Price_CAD"])
        .reset_index()
        .rename(columns = {"Price_CAD" : "Average Price"})
)

laptop_ram_plot = alt.Chart(laptop_ram_avg_price).mark_bar().encode(
    x=alt.X("Memory_GB")
        .title("Installed Memory"),
    y=alt.Y("Average Price")
        .title("Average Price of Laptops"),
    color=alt.Color("Memory_GB")
            .scale(scheme="category20b")
).configure_axisX(labelAngle=-45)
laptop_ram_plot

In [14]:
laptop_storage_avg_price = (
    laptop_train.groupby(["Storage_And_Type"])
        .mean(["Price_CAD"])
        .reset_index()
        .rename(columns = {"Price_CAD" : "Average Price"})
)

laptop_storage_plot = alt.Chart(laptop_storage_avg_price).mark_bar().encode(
    x=alt.X("Storage_And_Type")
        .title("Storage Type"),
    y=alt.Y("Average Price")
        .title("Average Price of Laptops"),
    color=alt.Color("Storage_And_Type")
            .scale(scheme="category20b")
).configure_axisX(labelAngle=-45)
laptop_storage_plot

In [15]:
laptop_gpu_avg_price = (
    laptop_train.groupby(["Gpu"])
        .mean(["Price_CAD"])
        .reset_index()
        .rename(columns = {"Price_CAD" : "Average Price"})
)

laptop_gpu_plot = alt.Chart(laptop_gpu_avg_price).mark_bar().encode(
    x=alt.X("Gpu")
        .title("Graphics Card"),
    y=alt.Y("Average Price")
        .title("Average Price of Laptops"),
    color=alt.Color("Gpu")
            .scale(scheme="category20b")
).configure_axisX(labelAngle=-45)
laptop_gpu_plot

In [16]:
laptop_os_avg_price = (
    laptop_train.groupby(["OpSys"])
        .mean(["Price_CAD"])
        .reset_index()
        .rename(columns = {"Price_CAD" : "Average Price"})
)

laptop_os_plot = alt.Chart(laptop_os_avg_price).mark_bar().encode(
    x=alt.X("OpSys")
        .title("Operating System"),
    y=alt.Y("Average Price")
        .title("Average Price of Laptops"),
    color=alt.Color("OpSys")
            .scale(scheme="category20b")
).configure_axisX(labelAngle=-45)
laptop_os_plot