In [15]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import pickle

In [2]:
female = pd.read_csv('./data/female.csv')
male = pd.read_csv('./data/male.csv')

In [4]:
new_female = female[['chestcircumference', 'weightkg', 'stature']].copy()
new_male = male[['chestcircumference', 'weightkg', 'stature']].copy()

In [5]:
new_female.head()

Unnamed: 0,chestcircumference,weightkg,stature
0,922,657,1560
1,839,534,1665
2,874,663,1711
3,1008,782,1660
4,1089,886,1572


In [6]:
combined = pd.concat([new_female, new_male], ignore_index=True)

In [7]:
combined.head()

Unnamed: 0,chestcircumference,weightkg,stature
0,922,657,1560
1,839,534,1665
2,874,663,1711
3,1008,782,1660
4,1089,886,1572


In [8]:
combined.shape

(6066, 3)

In [9]:
sizes = []
colors = []

for i, row in combined.iterrows():
    chest = row.chestcircumference/10

    if chest < 84:
        size = "XX_Small"
        color = "pink"
    elif chest < 90:
        size = "X_Small"
        color = "yellow"
    elif chest < 95:
        size = "Small"
        color = "red"
    elif chest < 102:
        size = "Medium"
        color = "blue"
    elif chest < 112:
        size = "Large"
        color = "lawngreen"
    elif chest < 123:
        size = "X-Large"
        color = "green"
    elif chest < 133:
        size = "XX-Large"
        color = "slategray"
    else:
        size = "XXX-Large"
        color = "black"
    sizes.append(size)
    colors.append(color)
combined['t-shirt'] = sizes
combined['color'] = colors

In [10]:
combined.head()

Unnamed: 0,chestcircumference,weightkg,stature,t-shirt,color
0,922,657,1560,Small,red
1,839,534,1665,XX_Small,pink
2,874,663,1711,X_Small,yellow
3,1008,782,1660,Medium,blue
4,1089,886,1572,Large,lawngreen


In [11]:
combined.shape

(6066, 5)

In [14]:
pd.DataFrame.to_csv(combined, './data/combined.csv', index=False)

### Create KNN Model

In [16]:
knn_model = KNeighborsClassifier(n_neighbors=5)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(combined[['weightkg', 'stature']], combined['t-shirt'], test_size=0.2, random_state=42)

In [21]:
y_train.head()

109       Large
4248    X-Large
5385      Large
5087    X-Large
2828     Medium
Name: t-shirt, dtype: object

In [22]:
knn_model.fit(X_train, y_train.values)

### Test model

In [27]:
weight = 56*10
height = 162*10

In [28]:
df = pd.DataFrame([[weight, height]], columns=['weightkg', 'stature'])
df.head()

Unnamed: 0,weightkg,stature
0,560,1620


In [29]:
knn_model.predict(df)[0]

'XX_Small'

### Save Model

In [30]:
pickle.dump(knn_model, open('knn_model.pkl', 'wb'))