In [1]:
# import libraries
import pandas as pd
import operator
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
import seaborn as sns

#3.2.1 Prepare the Data

In [2]:
# read in the dataset locally
chip_dataset = pd.read_csv('/content/chip_dataset.csv')

# drop the first column which list all the indices
chip_dataset.drop('Unnamed: 0', inplace=True, axis=1)

chip_dataset

Unnamed: 0,Product,Type,Release Date,Process Size (nm),TDP (W),Die Size (mm^2),Transistors (million),Freq (MHz),Foundry,Vendor,FP16 GFLOPS,FP32 GFLOPS,FP64 GFLOPS
0,AMD Athlon 64 3500+,CPU,2007-02-20,65.0,45.0,77.0,122.0,2200.0,Unknown,AMD,,,
1,AMD Athlon 200GE,CPU,2018-09-06,14.0,35.0,192.0,4800.0,3200.0,Unknown,AMD,,,
2,Intel Core i5-1145G7,CPU,2020-09-02,10.0,28.0,,,2600.0,Intel,Intel,,,
3,Intel Xeon E5-2603 v2,CPU,2013-09-01,22.0,80.0,160.0,1400.0,1800.0,Intel,Intel,,,
4,AMD Phenom II X4 980 BE,CPU,2011-05-03,45.0,125.0,258.0,758.0,3700.0,Unknown,AMD,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4849,NVIDIA Quadro 3000M,GPU,2011-02-22,40.0,75.0,332.0,1950.0,450.0,TSMC,NVIDIA,,432.0,36.0
4850,Intel GMA 950,GPU,2005-06-01,90.0,7.0,,,250.0,Intel,Intel,,,
4851,NVIDIA GeForce GT 320M,GPU,2010-03-03,40.0,23.0,100.0,486.0,500.0,TSMC,NVIDIA,,52.8,
4852,NVIDIA GeForce FX 5200,GPU,2003-03-06,150.0,,65.0,29.0,250.0,TSMC,NVIDIA,,,


In [3]:
# transform categorical feature 'Release Date' to numerical value
for i in range(len(chip_dataset['Release Date'])):
  if 'Na' in chip_dataset.iat[i, 2] :
    chip_dataset.iat[i, 2] = np.NaN
  else:
    year = int(chip_dataset.iat[i, 2][0:4])-2000
    month = int(chip_dataset.iat[i, 2][5:7])/12
    day = int(chip_dataset.iat[i, 2][8:10])/365
    chip_dataset.iat[i, 2] = year+ month + day

In [5]:
# split the data to CPU and GPU subsets
grouped = chip_dataset.groupby(chip_dataset['Type'])

# spilt and get the CPU dataset
CPU = grouped.get_group('CPU')
del CPU['FP16 GFLOPS']
del CPU['FP32 GFLOPS']
del CPU['FP64 GFLOPS']

# split and get the GPU dataset
GPU = grouped.get_group('GPU')

In [7]:
# data normalization for CPU dataset
from sklearn.preprocessing import MinMaxScaler
CPU_normal = CPU.copy().drop(columns=['Product','Type','Foundry','Vendor'])
scaler = MinMaxScaler()
scaler.fit(CPU_normal)
fit = scaler.fit_transform(CPU_normal)
# fit = pd.DataFrame(fit, columns = CPU_normal.columns)
final = pd.DataFrame(data = fit, columns = CPU_normal.columns, index = CPU_normal.index)
CPU_normal = CPU.copy().drop(columns=CPU_normal.columns).join(final)
CPU_normal

Unnamed: 0,Product,Type,Foundry,Vendor,Release Date,Process Size (nm),TDP (W),Die Size (mm^2),Transistors (million),Freq (MHz)
0,AMD Athlon 64 3500+,CPU,Unknown,AMD,0.321634,0.335260,0.110276,0.111274,0.004436,0.390244
1,AMD Athlon 200GE,CPU,Unknown,AMD,0.875211,0.040462,0.085213,0.279649,0.248552,0.634146
2,Intel Core i5-1145G7,CPU,Intel,Intel,0.970584,0.017341,0.067669,,,0.487805
3,Intel Xeon E5-2603 v2,CPU,Intel,Intel,0.634806,0.086705,0.197995,0.232796,0.071127,0.292683
4,AMD Phenom II X4 980 BE,CPU,Unknown,AMD,0.523187,0.219653,0.310777,0.376281,0.037625,0.756098
...,...,...,...,...,...,...,...,...,...,...
2187,Intel Xeon Gold 6312U,CPU,Intel,Intel,0.999080,0.017341,0.461153,,,0.439024
2188,AMD Ryzen 9 4900H,CPU,TSMC,AMD,0.948449,0.000000,0.132832,0.226940,0.509471,0.658537
2189,Intel Core 2 Duo E6550,CPU,Intel,Intel,0.339117,0.335260,0.160401,0.207906,0.013255,0.422683
2190,AMD Opteron 246,CPU,Unknown,AMD,0.215248,0.479769,0.210526,,0.003601,0.341463


In [8]:
#data normalization for GPU dataset
from sklearn.preprocessing import MinMaxScaler
GPU_normal = GPU.copy().drop(columns=['Product','Type','Foundry','Vendor'])
scaler = MinMaxScaler()
scaler.fit(GPU_normal)
fit = scaler.fit_transform(GPU_normal)

#add back to the dataframe
final = pd.DataFrame(data = fit, columns = GPU_normal.columns, index = GPU_normal.index)
#GPU_normal = GPU.copy().drop(columns=GPU_normal.columns).join(final)
GPU_normal

Unnamed: 0,Release Date,Process Size (nm),TDP (W),Die Size (mm^2),Transistors (million),Freq (MHz),FP16 GFLOPS,FP32 GFLOPS,FP64 GFLOPS
2192,11.341553,32.0,95.0,216.0,1160.0,100.0,,,
2193,2.568493,150.0,,174.0,80.0,200.0,,,
2194,19.22968,14.0,25.0,132.0,3300.0,1469.0,18.38,1177.0,36.77
2195,8.255479,130.0,,,,200.0,,12.8,
2196,8.832192,55.0,20.0,73.0,242.0,600.0,,96.0,
...,...,...,...,...,...,...,...,...,...
4849,11.226941,40.0,75.0,332.0,1950.0,450.0,,432.0,36.00
4850,5.50274,90.0,7.0,,,250.0,,,
4851,10.258219,40.0,23.0,100.0,486.0,500.0,,52.8,
4852,3.266438,150.0,,65.0,29.0,250.0,,,


In [10]:
# encode the categorical features
CPU['Vendor'] = CPU['Vendor'].replace(['AMD', 'Intel'], ['0', '1'])
GPU['Vendor'] = GPU['Vendor'].replace(['AMD', 'Intel', 'Other', 'NVIDIA', 'ATI'], ['0', '1','2','3','4'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  CPU['Vendor'] = CPU['Vendor'].replace(['AMD', 'Intel'], ['0', '1'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  GPU['Vendor'] = GPU['Vendor'].replace(['AMD', 'Intel', 'Other', 'NVIDIA', 'ATI'], ['0', '1','2','3','4'])


In [15]:
# drop NaN rows

#Here we will use y= number of transistors and x to be other variable
CPU_normal = CPU_normal.dropna()
#CPU.drop('Transistor')
X = CPU_normal. iloc[:, [4,5,6,7,9]]
y = CPU_normal. iloc[:, [8]]

X.head()

Unnamed: 0,Release Date,Process Size (nm),TDP (W),Die Size (mm^2),Freq (MHz)
0,0.321634,0.33526,0.110276,0.111274,0.390244
1,0.875211,0.040462,0.085213,0.279649,0.634146
3,0.634806,0.086705,0.197995,0.232796,0.292683
4,0.523187,0.219653,0.310777,0.376281,0.756098
5,0.634806,0.086705,0.235589,0.232796,0.439024


#3.2.2 ANN Brand Prediction for CPUs

In [16]:
y = CPU['Vendor']
y


0       0
1       0
2       1
3       1
4       0
       ..
2187    1
2188    0
2189    1
2190    0
2191    0
Name: Vendor, Length: 2192, dtype: object

In [17]:
X=CPU_normal.copy().drop(['Vendor', 'Foundry','Product','Type'],axis=1)
y = CPU_normal['Vendor'].replace(['AMD', 'Intel'], [0, 1])
X.shape
y.shape

(1543,)

In [18]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state=5)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)
y_train

(1234, 6) (1234,)
(309, 6) (309,)


1768    0
1317    1
1591    0
1224    0
1310    0
       ..
2009    0
1608    1
1413    1
298     0
1227    1
Name: Vendor, Length: 1234, dtype: int64

In [19]:
import tensorflow
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()

model.add(Dense(units =6, activation = 'relu', input_dim = 6))
model.add(Dense(units = 6, activation = 'relu'))
model.add(Dense(units = 1, activation = 'sigmoid'))

model.compile(loss='binary_crossentropy',optimizer='sgd',metrics=['accuracy'])

In [20]:
model.fit(X_train, y_train, batch_size = 1, epochs = 1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x7fe15110a160>

#3.2.3 ANN Brand Prediction for GPUs

In [21]:
y = GPU['Vendor'].replace(['AMD', 'Intel', 'Other', 'NVIDIA', 'ATI'], [0,1,2,3,4])
y

2192    1
2193    2
2194    3
2195    1
2196    4
       ..
4849    3
4850    1
4851    3
4852    3
4853    3
Name: Vendor, Length: 2662, dtype: object

In [22]:
X = GPU.copy().drop(['Vendor', 'Foundry','Product','Type'],axis=1)
X

Unnamed: 0,Release Date,Process Size (nm),TDP (W),Die Size (mm^2),Transistors (million),Freq (MHz),FP16 GFLOPS,FP32 GFLOPS,FP64 GFLOPS
2192,11.341553,32.0,95.0,216.0,1160.0,100.0,,,
2193,2.568493,150.0,,174.0,80.0,200.0,,,
2194,19.22968,14.0,25.0,132.0,3300.0,1469.0,18.38,1177.0,36.77
2195,8.255479,130.0,,,,200.0,,12.8,
2196,8.832192,55.0,20.0,73.0,242.0,600.0,,96.0,
...,...,...,...,...,...,...,...,...,...
4849,11.226941,40.0,75.0,332.0,1950.0,450.0,,432.0,36.00
4850,5.50274,90.0,7.0,,,250.0,,,
4851,10.258219,40.0,23.0,100.0,486.0,500.0,,52.8,
4852,3.266438,150.0,,65.0,29.0,250.0,,,


In [23]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state=5)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)
y_train

(2129, 9) (2129,)
(533, 9) (533,)


3858    0
2329    3
4586    3
3768    4
2755    3
       ..
3224    0
4313    3
3616    3
3917    0
4446    3
Name: Vendor, Length: 2129, dtype: object

In [24]:
X_train

Unnamed: 0,Release Date,Process Size (nm),TDP (W),Die Size (mm^2),Transistors (million),Freq (MHz),FP16 GFLOPS,FP32 GFLOPS,FP64 GFLOPS
3858,12.949543,28.0,375.0,352.0,4313.0,825.0,,3405.0,851.20
2329,13.336073,28.0,45.0,118.0,1270.0,810.0,,622.1,25.92
4586,6.338813,90.0,65.0,196.0,278.0,450.0,,,
3768,11.207763,40.0,39.0,104.0,627.0,650.0,,416.0,
2755,18.224201,16.0,100.0,314.0,7200.0,1316.0,125.5,8033.0,251.00
...,...,...,...,...,...,...,...,...,...
3224,11.554795,32.0,65.0,227.0,1178.0,444.0,,284.2,
4313,20.260959,12.0,115.0,445.0,10800.0,900.0,10370.0,5184.0,162.00
3616,2.871689,150.0,,65.0,29.0,250.0,,,
3917,14.796575,28.0,17.0,245.0,2410.0,533.0,,409.3,25.58


In [25]:
import tensorflow
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()

model.add(Dense(units =6, activation = 'relu', input_dim = 9))
model.add(Dense(units = 6, activation = 'relu'))
model.add(Dense(units = 1, activation = 'sigmoid'))

model.compile(loss='binary_crossentropy',optimizer='sgd',metrics=['accuracy'])
model.fit(X_train, y_train, batch_size = 1, epochs = 1000)

ValueError: ignored