In [6]:
import pandas as pd

data_source = pd.read_csv('./car-euro-edited.csv', header=0)
data_target = pd.read_csv('./car-japan-edited.csv', header=0)

In [None]:
import os

import numpy as np
import pandas as pd
import torch
from torchvision import datasets
from torchvision.transforms import ToTensor
from alexnet_pytorch import AlexNet

torch.multiprocessing.set_sharing_strategy('file_system')

model = AlexNet.from_pretrained('alexnet')

def transform(img):
    img = img.resize((224, 224)).convert('RGB')
    tensor = ToTensor()(img)
    return tensor

train_data = datasets.MNIST(
    root = 'data',
    train = True,
    transform = transform,
)

data_loader = torch.utils.data.DataLoader(train_data,
                                          batch_size=128,
                                          shuffle=True,
                                          num_workers=1)

count = 0
output_path = 'data/preprocessed_train.csv'
os.remove(output_path)
for batch, digit in data_loader:
    data = []
    label = []
    for idx, img in enumerate(batch):
        features = model.extract_features(img.reshape((1, 3, 224, 224)))
        data.append(features.detach().numpy().reshape((256 * 6 * 6)))
        label.append(digit[idx].item())
    dtf = pd.DataFrame(data=data)
    label_dtf = pd.DataFrame(data=label, columns=['digit'])
    full = pd.concat([dtf, label_dtf], axis=1)
    print('label', full.digit.unique())
    full.to_csv(output_path, mode='a',
                header=not os.path.exists(output_path), index=False)

    count += 1
    print(count)

In [None]:
import pandas as pd
import xgboost as xgb
import joblib


dtf = pd.read_csv('data/preprocessed_10000.csv')
features = dtf.columns[~dtf.columns.isin(['digit'])]
# keep only feature columns

model = xgb.XGBClassifier()

# Train using features and labels
model.fit(dtf[features], dtf['digit'])

# store model for further use
joblib.dump(model, 'data/model.pkl')

# Evaluate model
dtf_test = pd.read_csv('data/preprocessed_test.csv')
preds = model.predict(dtf_test[features])

print(list(dtf_test['digit'][:20]))
# > [1, 7, 8, 6, 7, 1, 1, 2, 7, 7, 1, 6, 0, 7, 5, 2, 2, 0, 7, 3]
print(list(preds[:20]))
# > [1, 7, 8, 6, 7, 1, 1, 2, 7, 7, 1, 6, 0, 7, 5, 2, 2, 0, 7, 3]

Try using xgboost

In [7]:
features = data_source[['Weight','Horsepower']]
labels = data_source[['Selected']]

In [8]:
import pandas as pd
import xgboost as xgb
import joblib

model = xgb.XGBClassifier()

# Train using features and labels
model.fit(features, labels)

# store model for further use
joblib.dump(model, 'testmodel.pkl')



  return f(*args, **kwargs)


['testmodel.pkl']

In [9]:
# Evaluate model
target_feature = data_target[['Weight','Horsepower']]

preds = model.predict(target_feature)

print(list(preds))
print(list(data_target['Selected']))

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [10]:
data_target

Unnamed: 0,Model,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Year,Origin,Selected
0,toyota corona,31.0,4,76,52,1649,16.5,1974,Japan,0
1,mazda glc deluxe,32.8,4,78,52,1985,19.4,1978,Japan,0
2,honda civic cvcc,33.0,4,91,53,1795,17.5,1975,Japan,0
3,honda civic,33.0,4,91,53,1795,17.4,1976,Japan,0
4,toyota starlet,39.1,4,79,58,1755,16.9,1981,Japan,0
5,toyota corolla tercel,38.1,4,89,60,1968,18.8,1980,Japan,0
6,honda civic 1300,35.1,4,81,60,1760,16.1,1981,Japan,0
7,datsun 710,32.0,4,83,61,2003,19.0,1974,Japan,0
8,toyota tercel,37.7,4,89,62,2050,17.3,1981,Japan,0
9,toyota corolla 1200,31.0,4,71,65,1773,19.0,1971,Japan,0
