In [None]:
import pandas as pd 
import numpy as np

# read in all our data
laptop_train = pd.read_csv("datasets/laptops_train.csv")
laptop_test = pd.read_csv("datasets/laptops_test.csv")

# set seed for reproducibility
np.random.seed(0)

In [None]:
print(laptop_train.head())
print(laptop_test.head())

In [None]:
frames = [laptop_train,laptop_test]
df = pd.concat(frames)

In [None]:
df.head()

In [None]:
# get the number of missing data points per column
missing_values_count = df.isnull().sum()
missing_values_count

In [None]:
df['Operating System Version'].value_counts()

In [None]:
df[df['Operating System Version'].isna()].groupby('Operating System').groups

In [None]:
grouped = df.groupby('Operating System')

for name,group in grouped:
    print(name)
    print(group['Operating System Version'].unique())

In [None]:
#Drop the Operating System Version because the laptop can just be updated by the user
df = df.drop('Operating System Version', axis=1)

In [None]:
df.columns

In [None]:
df[['Screen Size','Screen']]

Data Preprocessing
Course of Action:
Manufacturer         Target Encoding
Model Name           Split on the () into two columns- Target Encoding
Category             One Hot Encoding
Screen Size          Convert to float and scale (units is inches)
Screen               Split into new features - screen type, screen quality, HD(Binary), Touchscreen(Binary)
CPU                  Split into new features - Brand, Model Number, Speed
RAM                  Remove "GB" and scale
Storage              Convert to Total Storage column and scale
GPU                  Split into new features - Brand, Model Number, Speed
Operating System     One Hot Encoding (macOS = MacOS)
Weight               Convert to lbs and scale (4s = 4.04 typo)
Price                Convert to USD

In [None]:
#Course of Action:
#Manufacturer         Target Encoding
#Model Name           Split on the () into two columns- Target Encoding
#Category             One Hot Encoding
#Screen Size          Convert to float and scale (units is inches)
#Screen               Split into new features - screen type, screen quality, HD(Binary), Touchscreen(Binary)
#CPU                  Split into new features - Brand, Model Number, Speed
#RAM                  Remove "GB" and scale
#Storage              Convert to Total Storage column and scale
#GPU                  Split into new features - Brand, Model Number, Speed
#Operating System     One Hot Encoding (macOS = MacOS)
#Weight               Convert to lbs and scale (4s = 4.04 typo)
#Price                Convert to USD

In [None]:
#Treat Manufacturer as an nominal variable
df['Manufacturer'].unique()

In [None]:
# gives a tuple of column name and series
# for each column in the dataframe
for (columnName, columnData) in df.items():
    print('Column Name : ', columnName)
    print('Column Contents : ', columnData.unique())

In [None]:
df.dtypes

In [None]:
model_name = df['Model Name'].unique()
model_name.sort()
model_name

In [None]:
df[['Model Name','CPU']]

In [None]:
df['Model Name'].str.contains('/')

In [None]:
print('Yoga' in df['Model Name'].unique())

In [None]:
df[df['Model Name'].str.contains('/')]

In [None]:
category = df['Category'].unique()
category.sort()
category

In [None]:
# gives a tuple of column name and series
# for each column in the dataframe
for (columnName, columnData) in df.items():
    print('Column Name : ', columnName)
    print('Column Contents : ', columnData.nunique())

In [None]:
screen_size = df['Screen Size'].unique()
screen_size.sort()
screen_size

In [None]:
screen = df['Screen'].unique()
screen.sort()
print(df['Screen'].value_counts())
screen

In [None]:
CPU = df['CPU'].unique()
CPU.sort()
print(df['CPU'].value_counts())
CPU

In [None]:
RAM = df['RAM'].unique()
RAM.sort()
print(df['RAM'].value_counts())
RAM

In [None]:
storage = df[' Storage'].unique()
storage.sort()
print(df[' Storage'].value_counts())
storage

In [None]:
GPU = df['GPU'].unique()
GPU.sort()
print(df['GPU'].value_counts())
GPU

In [None]:
OS = df['Operating System'].unique()
OS.sort()
print(df['Operating System'].value_counts())
OS

In [None]:
weight = df['Weight'].unique()
weight.sort()
print(df['Weight'].value_counts())
weight

In [None]:
price = df['Price'].unique()
price.sort()
print(df['Price'].value_counts())
price

In [None]:
df.columns

In [None]:
#1 INR = 0.012203 USD Conversion Rate as of May 10,2023
df['Price_USD'] = df.Price/81.9433
df.Price_USD

In [None]:
df[['Price','Price_USD']]

In [None]:
#Target Encoding for Manfacturer


In [None]:
df['Weight_LBS'] = df.Weight*2.204623
df.Weight_LBS

In [None]:
df.Weight = df.Weight.str.replace('kg','')

In [None]:
df.Weight = df.Weight.astype(float)

In [None]:
#4s is a typo. Google search the weight of the laptop
df.Weight = df.Weight.replace('4s','4.04')

In [None]:
df[['Weight','Weight_LBS']]

In [None]:
#Operating System Typo
df['Operating System'] = df['Operating System'].replace('macOS','Mac OS')

In [None]:
df['RAM']

In [None]:
#Clearing GB from string and converting to int
df.RAM = df.RAM.str.replace('GB','')
df.RAM = df.RAM.astype(int)

In [None]:
df.info()

In [None]:
#Remove Leading Space in Column Name
df.rename(columns = {' Storage':'Storage'}, inplace = True)

In [None]:
df['Screen Size']

In [None]:
#Clearing " from string and converting to float
df['Screen Size'] = df['Screen Size'].str.replace('"','')
df['Screen Size'] = df['Screen Size'].astype(float)

In [None]:
df.info()

In [None]:
""" Pattern of the CPU string Brand, Model Number, and Speed
Since the brand and model number vary in length
The string will be flipped to take the Speed then
take the brand from the front of the string """
CPU_df = df[['CPU']].copy()
# Python code
# To reverse words in a given string
CPU_reversed = []
# input string
for x in CPU_df.CPU:
    # reversing words in a given string
    s = x.split()[::-1]
    l = []
    for i in s:
        # appending reversed words to l
        l.append(i)
    # printing reverse words
    CPU_reversed.append(" ".join(l))

In [None]:
#Creating New feature - CPU Speed
CPU_df['CPU_reversed'] = CPU_reversed
new = CPU_df['CPU_reversed'].str.split(' ', expand=True, n=2)
df['CPU_Speed'] = new[0]
df['CPU_Speed'] = df['CPU_Speed'].str.replace('GHz','')
df['CPU_Speed'] = df['CPU_Speed'].astype(float)
new['CPU_Flipped'] = new[1] + " " +new[2]

In [None]:
# Python code
# To reverse words in a given string
CPU_flipped = []
# input string
for x in new.CPU_Flipped:
    # reversing words in a given string
    s = x.split()[::-1]
    l = []
    for i in s:
        # appending reversed words to l
        l.append(i)
    # printing reverse words
    CPU_flipped.append(" ".join(l))

In [None]:
#Creating New features - CPU Brand and Model
new['CPU_flipped'] = CPU_flipped
new2 = new['CPU_flipped'].str.split(' ', expand=True, n=1)
df['CPU Brand'] = new2[0]
df['CPU Model'] = new2[1]

In [None]:
#Exploring new features
CPU_speed = df['CPU_Speed'].unique()
CPU_speed.sort()
print(df['CPU_Speed'].value_counts())
CPU_speed

In [None]:
df.info()

In [None]:
#Exploring new features
CPU_brand = df['CPU Brand'].unique()
CPU_brand.sort()
print(df['CPU Brand'].value_counts())
CPU_brand

In [None]:
#Exploring new features
CPU_model = df['CPU Model'].unique()
CPU_model.sort()
print(df['CPU Model'].value_counts())
CPU_model

In [None]:
#Creating new features - GPU Brand and Model from GPU
new3 = df['GPU'].str.split(' ', expand=True, n=1)
df['GPU Brand'] = new3[0]
df['GPU Model'] = new3[1]

In [None]:
GPU_model = df['GPU Model'].unique()
GPU_model.sort()
print(df['GPU Model'].value_counts())
GPU_model

In [None]:
GPU_brand = df['GPU Brand'].unique()
GPU_brand.sort()
print(df['GPU Brand'].value_counts())
GPU_brand

In [None]:
#Some of the Model Names had redundant information captured in other columns
new4 = df['Model Name'].str.split('(', expand=True, n=1)
df['Model Name Cleaned'] = new4[0]

In [None]:
model_name_c = df['Model Name Cleaned'].unique()
model_name_c.sort()
print(df['Model Name Cleaned'].value_counts())
model_name_c

In [None]:
#Creating new feature of whether the screen is touchscreen
screen_touch = []

for x in df.Screen:
    if 'Touchscreen' in x:
        screen_touch.append(1)
    else:
        screen_touch.append(0)

In [None]:
#Creating new feature of whether the screen is hd
screen_hd = []

for x in df.Screen:
    if 'HD' in x:
        screen_hd.append(1)
    else:
        screen_hd.append(0)

In [None]:
df['Touchscreen'] = screen_touch
df['Screen_HD'] = screen_hd

In [None]:
new5 = df['Screen'].str.split(' ', expand=True)
new5 = new5.fillna('0')

In [None]:
#Creating new feature - screen quality from Screen column
screen_quality = []
for index,row in new5.iterrows():
    screen_quality.append(row[row.str.contains('x')].values[0])
 #       screen_quality.append(row[index])
  #  else:
   #     screen_quality.append(0)

In [None]:
df['Screen Quality'] = screen_quality

In [None]:
screen_quality_cleaned = df['Screen Quality'].unique()
screen_quality_cleaned.sort()
print(df['Screen Quality'].value_counts())
screen_quality_cleaned

In [None]:
df.Screen

In [None]:
""" Certain rows have multiple storage types and sizes.
Combining multiple storage types into total storage in the TB units """
new6 = df['Storage'].str.split(' ', expand=True)
storage = []
for index,row in new6.iterrows():
    storage.append(row[row.str.contains('B')].values)
new6 = new6.fillna('0')

In [None]:
storage_df = pd.DataFrame(storage)

In [None]:
storage_df = storage_df.fillna('0GB')

In [None]:
#*(1024*1024*1024)
#*(1024*1024*1024*1024)
col_0 = []
for val in storage_df[0]:
    if 'GB' in val:
        col_0.append(int(re.findall(r'\d+',val)[0])*(1024*1024*1024))
    elif 'TB' in val:
        col_0.append(int(re.findall(r'\d+',val)[0])*(1024*1024*1024*1024))
    else:
        print('Nothing')

In [None]:
col_1 = []
for val in storage_df[1]:
    if 'GB' in val:
        col_1.append(int(re.findall(r'\d+',val)[0])*(1024*1024*1024))
    elif 'TB' in val:
        col_1.append(int(re.findall(r'\d+',val)[0])*(1024*1024*1024*1024))
    else:
        print('Nothing')

In [None]:
res_list = [col_0[i] + col_1[i] for i in range(len(col_0))]
total_storage_list = [x/(1024*1024*1024*1024) for x in res_list]
df['Total Storage in TB'] = total_storage_list

In [None]:
df[['Storage','Total Storage in TB']]

In [None]:
df.to_csv('datasets/laptops_preprocessed.csv',index=True)

In [30]:
import pandas as pd 
import numpy as np

df = pd.read_csv('datasets/laptops_preprocessed.csv',index_col=0)
df.head()


Unnamed: 0,Manufacturer,Model Name,Model Name Cleaned,Category,Screen Size,Screen,Touchscreen,Screen_HD,Screen Quality,CPU,...,Storage,Total Storage in TB,GPU,GPU Brand,GPU Model,Operating System,Weight,Weight_LBS,Price,Price_USD
0,Apple,MacBook Pro,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,0,0,2560x1600,Intel Core i5 2.3GHz,...,128GB SSD,0.125,Intel Iris Plus Graphics 640,Intel,Iris Plus Graphics 640,Mac OS,1.37,3.020334,11912523.48,145375.1982
1,Apple,Macbook Air,Macbook Air,Ultrabook,13.3,1440x900,0,0,1440x900,Intel Core i5 1.8GHz,...,128GB Flash Storage,0.125,Intel HD Graphics 6000,Intel,HD Graphics 6000,Mac OS,1.34,2.954195,7993374.48,97547.6272
2,HP,250 G6,250 G6,Notebook,15.6,Full HD 1920x1080,0,1,1920x1080,Intel Core i5 7200U 2.5GHz,...,256GB SSD,0.25,Intel HD Graphics 620,Intel,HD Graphics 620,No OS,1.86,4.100599,5112900.0,62395.58329
3,Apple,MacBook Pro,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,0,0,2880x1800,Intel Core i7 2.7GHz,...,512GB SSD,0.5,AMD Radeon Pro 455,AMD,Radeon Pro 455,Mac OS,1.83,4.03446,22563005.4,275348.9962
4,Apple,MacBook Pro,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,0,0,2560x1600,Intel Core i5 3.1GHz,...,256GB SSD,0.25,Intel Iris Plus Graphics 650,Intel,Iris Plus Graphics 650,Mac OS,1.37,3.020334,16037611.2,195715.9548


In [None]:
#apply(lambda x: int(x.split(' ')[0]))
#Example of cleaner way to create new features

In [15]:
objList = df.select_dtypes(include = "object").columns
print (objList)

Index(['Manufacturer', 'Model Name', 'Model Name Cleaned', 'Category',
       'Screen', 'Screen Quality', 'CPU', 'CPU Brand', 'CPU Model', 'Storage',
       'GPU', 'GPU Brand', 'GPU Model', 'Operating System'],
      dtype='object')


In [16]:
#Label Encoding for object to numeric conversion
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

for feat in objList:
    df[feat] = le.fit_transform(df[feat].astype(str))

print (df.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1302 entries, 0 to 1301
Data columns (total 24 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Manufacturer         1302 non-null   int32  
 1   Model Name           1302 non-null   int32  
 2   Model Name Cleaned   1302 non-null   int32  
 3   Category             1302 non-null   int32  
 4   Screen Size          1302 non-null   float64
 5   Screen               1302 non-null   int32  
 6   Touchscreen          1302 non-null   int64  
 7   Screen_HD            1302 non-null   int64  
 8   Screen Quality       1302 non-null   int32  
 9   CPU                  1302 non-null   int32  
 10  CPU Brand            1302 non-null   int32  
 11  CPU Model            1302 non-null   int32  
 12  CPU_Speed            1302 non-null   float64
 13  RAM                  1302 non-null   int64  
 14  Storage              1302 non-null   int32  
 15  Total Storage in TB  1302 non-null   f

In [17]:
#Saving df after LabelEncoder is applied
df.to_csv('datasets/laptops_afterlabelencoder.csv',index=True)

In [18]:
#Choosing the features to be used in the model
X = df.drop(['Model Name','Screen','CPU','Storage','GPU','Weight','Price','Price_USD'],axis='columns')

In [19]:
#Choosing target for the model
y = df.Price_USD
y.head()

0    145375.19820
1     97547.62720
2     62395.58329
3    275348.99620
4    195715.95480
Name: Price_USD, dtype: float64

In [20]:
#Splitting train and test data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=43)

In [31]:
#Deciding on best model for the data
from sklearn.metrics import explained_variance_score,mean_absolute_error,r2_score
from time import time

from sklearn.linear_model import LinearRegression, Ridge,Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import cross_val_score

regressors = [
    KNeighborsRegressor(),
    GradientBoostingRegressor(),
    KNeighborsRegressor(),
    ExtraTreesRegressor(),
    RandomForestRegressor(),
    DecisionTreeRegressor(),
    LinearRegression(),
    Lasso(),
    Ridge()
]

cv = ShuffleSplit(n_splits=5,test_size=0.2, random_state=0)
head = 10
for model in regressors[:head]:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test) 
    print(model)
    print("\tExplained variance:", explained_variance_score(y_test, y_pred))
    print("\tMean absolute error:", mean_absolute_error(y_test, y_pred))
    print("\tR2 score:", r2_score(y_test, y_pred))
    print(cross_val_score(model,X,y,cv=cv))
    print()

KNeighborsRegressor()
	Training time: 0.002s
	Prediction time: 0.017s
	Explained variance: 0.7456339753457355
	Mean absolute error: 25379.263703348657
	R2 score: 0.7446258061416231

GradientBoostingRegressor()
	Training time: 0.158s
	Prediction time: 0.002s
	Explained variance: 0.8831144798210268
	Mean absolute error: 18919.316523457474
	R2 score: 0.8790154524455557

KNeighborsRegressor()
	Training time: 0.001s
	Prediction time: 0.012s
	Explained variance: 0.7456339753457355
	Mean absolute error: 25379.263703348657
	R2 score: 0.7446258061416231

ExtraTreesRegressor()
	Training time: 0.352s
	Prediction time: 0.012s
	Explained variance: 0.9031180342930277
	Mean absolute error: 16219.557399392028
	R2 score: 0.9005268273155099

RandomForestRegressor()
	Training time: 0.539s
	Prediction time: 0.015s
	Explained variance: 0.8868551479248608
	Mean absolute error: 16854.727187884182
	R2 score: 0.8856871244724549

DecisionTreeRegressor()
	Training time: 0.008s
	Prediction time: 0.002s
	Explained

In [None]:
#Hyperparameter tuning for GradientBoostingRegressor()
from sklearn.model_selection import GridSearchCV
parameters = {'learning_rate': [0.01,0.02,0.03,0.04],
                  'subsample'    : [0.9, 0.5, 0.2, 0.1],
                  'n_estimators' : [100,500,1000, 1500],
                  'max_depth'    : [4,6,8,10]
                 }

grid_GBR = GridSearchCV(estimator=GradientBoostingRegressor(), param_grid = parameters, cv = 2, n_jobs=-1)
grid_GBR.fit(X_train, y_train)

print(" Results from Grid Search " )
print("\n The best estimator across ALL searched params:\n",grid_GBR.best_estimator_)
print("\n The best score across ALL searched params:\n",grid_GBR.best_score_)
print("\n The best parameters across ALL searched params:\n",grid_GBR.best_params_)

In [32]:
#Hyperparameter tuning for ExtraTreesRegressor()
param_grid = {
    'n_estimators': [10,50,100],
    'criterion': ['mse', 'mae'],
    'max_depth': [2,8,16,32,50],
    'min_samples_split': [2,4,6],
    'min_samples_leaf': [1,2],
    #'oob_score': [True, False],
    'max_features': ['auto','sqrt','log2'],    
    'bootstrap': [True, False],
    'warm_start': [True, False],
}

from sklearn.ensemble import ExtraTreesRegressor
model = ExtraTreesRegressor ()

gcv = GridSearchCV(model,param_grid,cv=5,n_jobs=-1).fit(X_train,y_train)
print(" Results from Grid Search " )
print("\n The best estimator across ALL searched params:\n",gcv.best_estimator_)
print("\n The best score across ALL searched params:\n",gcv.best_score_)
print("\n The best parameters across ALL searched params:\n",gcv.best_params_)

 Results from Grid Search 

 The best estimator across ALL searched params:
 ExtraTreesRegressor(criterion='mse', max_depth=50, max_features='sqrt',
                    min_samples_split=4, n_estimators=50, warm_start=True)

 The best score across ALL searched params:
 0.8254279245091919

 The best parameters across ALL searched params:
 {'bootstrap': False, 'criterion': 'mse', 'max_depth': 50, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 4, 'n_estimators': 50, 'warm_start': True}


  warn(


In [None]:
#Hyperparameter tuning for RandomForestRegressor()
rfc=RandomForestRegressor(random_state=42)

param_grid = { 
    'n_estimators': [200, 500],
    'max_features': ['sqrt', 'log2'],
    'max_depth' : [4,5,6,7,8],
    'criterion' :['squared_error', 'absolute_error', 'friedman_mse', 'poisson']
}

CV_rfc = GridSearchCV(estimator=rfc, param_grid=param_grid, cv= 5)
CV_rfc.fit(X_train, y_train)

print(" Results from Grid Search " )
print("\n The best estimator across ALL searched params:\n",CV_rfc.best_estimator_)
print("\n The best score across ALL searched params:\n",CV_rfc.best_score_)
print("\n The best parameters across ALL searched params:\n",CV_rfc.best_params_)

In [None]:
#Hyperparameter tuning for DecisionTreeRegressor()

parameters={"splitter":["best","random"],
            "max_depth" : [1,3,5,7,9,11,12],
           "min_samples_leaf":[1,2,3,4,5,6,7,8,9,10],
           "min_weight_fraction_leaf":[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
           "max_features":["auto","log2","sqrt",None],
           "max_leaf_nodes":[None,10,20,30,40,50,60,70,80,90] }

tuning_model=GridSearchCV(DecisionTreeRegressor(),param_grid=parameters,scoring='neg_mean_squared_error',cv=3,verbose=3)
tuning_model.fit(X_train,y_train)

print(" Results from Grid Search " )
print("\n The best estimator across ALL searched params:\n",tuning_model.best_estimator_)
print("\n The best score across ALL searched params:\n",tuning_model.best_score_)
print("\n The best parameters across ALL searched params:\n",tuning_model.best_params_)

In [34]:
#Checking scores of the hyperparameter tuned models
regressors = [
    ExtraTreesRegressor(criterion='mse', max_depth=50, max_features='sqrt',
                    min_samples_split=4, n_estimators=50, warm_start=True),
    RandomForestRegressor(max_depth=8, max_features='sqrt', n_estimators=200,
                      random_state=42)
]

head = 2
for model in regressors[:head]:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test) 
    print(model)
    print("\tExplained variance:", explained_variance_score(y_test, y_pred))
    print("\tMean absolute error:", mean_absolute_error(y_test, y_pred))
    print("\tR2 score:", r2_score(y_test, y_pred))
    print(cross_val_score(model,X,y,cv=cv))
    print()

  warn(


ExtraTreesRegressor(criterion='mse', max_depth=50, max_features='sqrt',
                    min_samples_split=4, n_estimators=50, warm_start=True)
	Training time: 0.082s
	Prediction time: 0.007s
	Explained variance: 0.8919367898716148
	Mean absolute error: 16665.30493547048
	R2 score: 0.8890409259834438

RandomForestRegressor(max_depth=8, max_features='sqrt', n_estimators=200,
                      random_state=42)
	Training time: 0.300s
	Prediction time: 0.016s
	Explained variance: 0.8486892426386257
	Mean absolute error: 19978.59634288556
	R2 score: 0.8450947501712552



In [22]:
#Final model and score
from sklearn.ensemble import ExtraTreesRegressor
best_model = ExtraTreesRegressor(criterion='mse', max_depth=50, max_features='sqrt',
                    min_samples_split=4, n_estimators=50, warm_start=True)

best_model.fit(X_train,y_train)
best_model.score(X_test,y_test)

  warn(


0.8815112581250318

In [25]:
#Saving the model
import pickle
with open('laptop_price_regression.pickle','wb') as f:
    pickle.dump(best_model,f)

In [24]:
#Saving Column names
import json
columns={
    'data_columns': [col.lower() for col in X.columns]
}

with open('columns.json','w') as f:
    f.write(json.dumps(columns))

In [23]:
#Storing LabelEncoder for GUI use
filehandler = open("le.obj","wb")
pickle.dump(le,filehandler)
filehandler.close()

In [38]:
#Saving unique column values for GUI drop down fields
dict = {}

for (columnName, columnData) in df.iteritems():
    #print('Column Name : ', columnName)
    #print('Column Contents : ', columnData.unique())
    dict[columnName] = columnData.unique().tolist()

with open("column_values.json", "w") as outfile:
    json.dump(dict, outfile)


Column Name :  Manufacturer
Column Name :  Model Name
Column Name :  Model Name Cleaned
Column Name :  Category
Column Name :  Screen Size
Column Name :  Screen
Column Name :  Touchscreen
Column Name :  Screen_HD
Column Name :  Screen Quality
Column Name :  CPU
Column Name :  CPU Brand
Column Name :  CPU Model
Column Name :  CPU_Speed
Column Name :  RAM
Column Name :  Storage
Column Name :  Total Storage in TB
Column Name :  GPU
Column Name :  GPU Brand
Column Name :  GPU Model
Column Name :  Operating System
Column Name :  Weight
Column Name :  Weight_LBS
Column Name :  Price
Column Name :  Price_USD


  for (columnName, columnData) in df.iteritems():
