<a href="https://colab.research.google.com/github/ishancoderr/3D_modeling/blob/main/athletics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

### ***Read csv data file in athletes***

In [7]:
athletes_data=pd.read_csv("/content/sample_data/new_athletes.csv")

In [8]:
athletes_data

Unnamed: 0,id,name,nationality,sex,height,weight,sport,medal
0,736041664,A Jesus Garcia,ESP,male,1.72,64,athletics,0
1,532037425,A Lam Shin,KOR,female,1.68,56,fencing,0
2,435962603,Aaron Brown,CAN,male,1.98,79,athletics,0
3,521041435,Aaron Cook,MDA,male,1.83,80,taekwondo,0
4,33922579,Aaron Gate,NZL,male,1.81,71,cycling,0
...,...,...,...,...,...,...,...,...
10853,265605954,Zurian Hechavarria,CUB,female,1.64,58,athletics,0
10854,214461847,Zuzana Hejnova,CZE,female,1.73,63,athletics,0
10855,88361042,di Xiao,CHN,male,1.85,100,wrestling,0
10856,900065925,le Quoc Toan Tran,VIE,male,1.60,56,weightlifting,0


In [9]:
column_names= athletes_data.columns.values.tolist()
print(column_names)

['id', 'name', 'nationality', 'sex', 'height', 'weight', 'sport', 'medal']


In [10]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

In [11]:
column_names= athletes_data.columns.values.tolist()
print(column_names)

['id', 'name', 'nationality', 'sex', 'height', 'weight', 'sport', 'medal']


In [12]:
#Create label encoder

labelencode=LabelEncoder()
athletes_data['nationality_l']=labelencode.fit_transform(athletes_data['nationality'])
labelencode=LabelEncoder()
athletes_data['sport_l']=labelencode.fit_transform(athletes_data['sport'])
labelencode=LabelEncoder()
athletes_data['sex_l']=labelencode.fit_transform(athletes_data['sex'])


In [13]:
athletes_data



Unnamed: 0,id,name,nationality,sex,height,weight,sport,medal,nationality_l,sport_l,sex_l
0,736041664,A Jesus Garcia,ESP,male,1.72,64,athletics,0,59,2,1
1,532037425,A Lam Shin,KOR,female,1.68,56,fencing,0,101,8,0
2,435962603,Aaron Brown,CAN,male,1.98,79,athletics,0,33,2,1
3,521041435,Aaron Cook,MDA,male,1.83,80,taekwondo,0,117,21,1
4,33922579,Aaron Gate,NZL,male,1.81,71,cycling,0,138,6,1
...,...,...,...,...,...,...,...,...,...,...,...
10853,265605954,Zurian Hechavarria,CUB,female,1.64,58,athletics,0,48,2,0
10854,214461847,Zuzana Hejnova,CZE,female,1.73,63,athletics,0,50,2,0
10855,88361042,di Xiao,CHN,male,1.85,100,wrestling,0,38,26,1
10856,900065925,le Quoc Toan Tran,VIE,male,1.60,56,weightlifting,0,195,25,1


In [14]:
new_athletes_data= athletes_data[['nationality_l',	'sport_l'	,'sex_l','height'	,'weight','medal']]

In [15]:
new_athletes_data

Unnamed: 0,nationality_l,sport_l,sex_l,height,weight,medal
0,59,2,1,1.72,64,0
1,101,8,0,1.68,56,0
2,33,2,1,1.98,79,0
3,117,21,1,1.83,80,0
4,138,6,1,1.81,71,0
...,...,...,...,...,...,...
10853,48,2,0,1.64,58,0
10854,50,2,0,1.73,63,0
10855,38,26,1,1.85,100,0
10856,195,25,1,1.60,56,0


# **splitting the dataset into Test set and Training set**

In [16]:
x=new_athletes_data.iloc[:,:-1].values 
y=new_athletes_data.iloc[:,-1].values

In [17]:
print(x)
print(y)

[[ 59.     2.     1.     1.72  64.  ]
 [101.     8.     0.     1.68  56.  ]
 [ 33.     2.     1.     1.98  79.  ]
 ...
 [ 38.    26.     1.     1.85 100.  ]
 [195.    25.     1.     1.6   56.  ]
 [155.     2.     1.     1.85  70.  ]]
[0 0 0 ... 0 0 0]


In [18]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,train_size=0.75,test_size=0.25,random_state=0)

In [19]:
print(x_train)
print(y_train)

[[  6.    18.     1.     1.87  97.  ]
 [ 71.     2.     1.     1.8   78.  ]
 [105.    18.     0.     1.64  60.  ]
 ...
 [ 26.    14.     1.     1.8   90.  ]
 [ 38.     6.     1.     1.72  65.  ]
 [ 73.    11.     0.     1.7   55.  ]]
[0 0 0 ... 0 0 0]


In [20]:
print(x_test)
print(y_test)

[[ 71.     2.     1.     1.89  78.  ]
 [ 48.     2.     0.     1.71  60.  ]
 [135.    14.     1.     1.8   73.  ]
 ...
 [ 67.    13.     1.     1.7   73.  ]
 [ 80.    10.     0.     1.61  52.  ]
 [173.     5.     1.     1.89  90.  ]]
[0 0 0 ... 0 0 1]


# **Feature Scalling**

In [21]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(x_train)
X_test = sc.transform(x_test)

In [22]:
print(X_train)
print(X_test)

[[-1.49425506  1.01735905  0.91845467  0.89843998  1.52952314]
 [-0.37059005 -0.93667102  0.91845467  0.27877784  0.35860166]
 [ 0.21717319  1.01735905 -1.08878536 -1.13759275 -0.75069238]
 ...
 [-1.14851198  0.52885153  0.91845467  0.27877784  1.09813102]
 [-0.94106613 -0.4481635   0.91845467 -0.42940745 -0.44255515]
 [-0.33601574  0.16247089 -1.08878536 -0.60645378 -1.05882961]]
[[-0.37059005 -0.93667102  0.91845467  1.0754863   0.35860166]
 [-0.76819459 -0.93667102 -1.08878536 -0.51793062 -0.75069238]
 [ 0.73578781  0.52885153  0.91845467  0.27877784  0.05046443]
 ...
 [-0.43973866  0.40672465  0.91845467 -0.60645378  0.05046443]
 [-0.21500566  0.04034401 -1.08878536 -1.40316224 -1.24371195]
 [ 1.39269966 -0.57029038  0.91845467  1.0754863   1.09813102]]


# **Train Logistic Regression model on Training set**

In [23]:
from sklearn.linear_model import LogisticRegression
Classifier=LogisticRegression(random_state=0)
Classifier.fit(X_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=0, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

# predicting new result

In [24]:
print(Classifier.predict(sc.transform([[59,2,1,1.71,64]])))

[0]
