-
Notifications
You must be signed in to change notification settings - Fork 0
/
preload_data.py
106 lines (91 loc) · 3.27 KB
/
preload_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from sklearn import svm, metrics, tree, neural_network
import numpy as np
import csv
brands = np.loadtxt('edited_data/brands.csv', delimiter=',', dtype='str')
device_classes = np.loadtxt('edited_data/classes.csv', delimiter=',', dtype='str')
prediction_data_classification = np.loadtxt('initial_data/test_classification_unlabeled.csv', delimiter=',', dtype='str')
prediction_data_regression = np.loadtxt('initial_data/test_regression_unlabeled.csv', delimiter=',', dtype='str')
# remove first line
prediction_data_classification = np.delete(prediction_data_classification, 0, 0)
prediction_data_regression = np.delete(prediction_data_regression, 0, 0)
brands = brands[:, 0]
device_classes = device_classes[:, 0]
# remove id from datasets
prediction_data_classification = prediction_data_classification[:, 1:]
prediction_data_regression = prediction_data_regression[:, 1:]
# replace nominal values with numbers
for row in prediction_data_classification:
brand = row[4]
value = np.where(brands == brand)
try:
row[4] = value[0][0] + 1
except:
row[4] = 0
for row in prediction_data_regression:
brand = row[4]
value = np.where(brands == brand)
try:
row[4] = value[0][0] + 1
except:
row[4] = 0
# values from previous test
ram_average = 4.85
size_average = 9.82
rom_average = 131.61
weight_average = 1016.32
for row in prediction_data_classification:
if row[0] == '':
row[0] = ram_average
numeric_value = row[0].astype(np.float)
if numeric_value > 32:
row[0] = ram_average
if row[1] == '':
row[1] = size_average
if row[2] == '':
row[2] = rom_average
numeric_ram_value = row[0].astype(np.float)
numeric_rom_value = row[2].astype(np.float)
if numeric_rom_value <= numeric_ram_value:
row[3] = rom_average
numeric_value = row[1].astype(np.float)
if row[3] == '':
row[3] = weight_average
numeric_value = row[3].astype(np.float)
if numeric_value > 5500:
row[3] = weight_average
for row in prediction_data_regression:
if row[0] == '':
row[0] = ram_average
numeric_value = row[0].astype(np.float)
if numeric_value > 32:
row[0] = ram_average
if row[1] == '':
row[1] = size_average
if row[2] == '':
row[2] = rom_average
numeric_ram_value = row[0].astype(np.float)
numeric_rom_value = row[2].astype(np.float)
if numeric_rom_value <= numeric_ram_value:
row[3] = rom_average
numeric_value = row[1].astype(np.float)
if row[3] == '':
row[3] = weight_average
numeric_value = row[3].astype(np.float)
if numeric_value > 5500:
row[3] = weight_average
print(device_classes)
for row in prediction_data_regression:
device_class = row[5]
value = np.where(device_classes == device_class)
try:
row[5] = value[0][0] + 1
except:
row[5] = 0
with open('edited_data/classification_unlabeled_edited.csv', "w", newline='') as csv_file:
writer = csv.writer(csv_file, delimiter=',')
for data in prediction_data_classification:
writer.writerow(data)
with open('edited_data/regression_unlabeled_edited.csv', "w", newline='') as csv_file:
writer = csv.writer(csv_file, delimiter=',')
for data in prediction_data_regression:
writer.writerow(data)