

##**Importing Libraries**

In [53]:
from sklearn import datasets
import pandas as pd
from collections import Counter


### **Loading Datasets & Analyzing Attributes**


In [54]:
wine=datasets.load_wine()

#### **Features**

In [55]:
wine.feature_names

['alcohol',
 'malic_acid',
 'ash',
 'alcalinity_of_ash',
 'magnesium',
 'total_phenols',
 'flavanoids',
 'nonflavanoid_phenols',
 'proanthocyanins',
 'color_intensity',
 'hue',
 'od280/od315_of_diluted_wines',
 'proline']

#### **Data**

In [56]:
wine.data

array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
        1.065e+03],
       [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
        1.050e+03],
       [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
        1.185e+03],
       ...,
       [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
        8.350e+02],
       [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
        8.400e+02],
       [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
        5.600e+02]])

#### **Target Names**

In [57]:
wine.target_names

array(['class_0', 'class_1', 'class_2'], dtype='<U7')

#### **DataFraming the Datasets**

In [58]:
DF=pd.DataFrame(wine.data,columns=wine.feature_names)
DF['target']=wine.target

### **Actual DataFrame**

In [59]:
DF

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.20,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.50,16.8,113.0,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740.0,2
174,13.40,3.91,2.48,23.0,102.0,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750.0,2
175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835.0,2
176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840.0,2


#### Checking the Variability of Target Values

In [60]:
DF['target'].value_counts()

1    71
0    59
2    48
Name: target, dtype: int64

### Splitting the DataFrame into Train-Test Split

In [61]:
def trainTestSplitDF(dataframe,train_percent):
  train_df=dataframe.sample(n=round((train_percent/100)*dataframe.shape[0]))
  test_df=dataframe[~dataframe.index.isin(train_df.index)]
  return train_df,test_df

In [62]:
TRAIN_DF,TEST_DF=trainTestSplitDF(DF,80)

In [63]:
TRAIN_DF.shape

(142, 14)

In [64]:
TEST_DF.shape

(36, 14)

### **Frame To List**

In [65]:
def frameToList(dataframe):
  return dataframe.values.tolist()

#### Checking the Variability of TRAIN TARGETS AND TEST TARGETS

In [66]:
TRAIN_DF['target'].value_counts()

1    54
0    46
2    42
Name: target, dtype: int64

In [67]:
TEST_DF['target'].value_counts()

1    17
0    13
2     6
Name: target, dtype: int64

In [68]:
TRAIN_LIST=frameToList(TRAIN_DF)

In [69]:
TEST_LIST=frameToList(TEST_DF)

##**Euclidean Distance**

#### **For SIngle ELement**

In [70]:
SAMPLE=TEST_LIST[0]
print(SAMPLE)

[14.37, 1.95, 2.5, 16.8, 113.0, 3.85, 3.49, 0.24, 2.18, 7.8, 0.86, 3.45, 1480.0, 0.0]


In [71]:
SAMPLE[len(SAMPLE)-1]

0.0

In [72]:
TEST_SAMPLE=TRAIN_LIST[0]
print(TEST_SAMPLE)

[12.45, 3.03, 2.64, 27.0, 97.0, 1.9, 0.58, 0.63, 1.14, 7.5, 0.67, 1.73, 880.0, 2.0]


### EUCLIDEAN DISTANCE BETWEEN ONE TRAINING PT. AND ONE TEST PT.

In [73]:
distance=0
for i in range(len(SAMPLE)-1):
  # print(SAMPLE[i])
  distance+=(SAMPLE[i]-TEST_SAMPLE[i])**2
distance=round(((distance)**(1/2)),2)
print(distance)

600.32


In [74]:

for TEST in TEST_LIST:
  distance=0
  for TRAIN in TRAIN_LIST:
    for i in range(len(TRAIN)-1):
      # print(SAMPLE[i])
      distance+=(TRAIN[i]-TEST[i])**2
    distance=round(((distance)**(1/2)),2)


# **PREDICTING ONE VALUE**

In [75]:
def predictOneValue(TEST,TRAIN_LIST,K):
  sorted1=[]
  dict={}
  for TRAIN in TRAIN_LIST:
    distance=0
    for i in range(len(TRAIN)-1):
      distance+=(TRAIN[i]-TEST[i])**2
    distance=round(((distance)**(1/2)),2)
    sorted1.append([TRAIN,distance])
  sorted_nested_list=sorted(sorted1,key=lambda x:x[1])
  sample=sorted_nested_list[0:K]
  value_counts = Counter(sublist[0][-1] for sublist in sample)
  max_key = max(value_counts, key=value_counts.get)
  dict['ValueCounts']=value_counts
  # print(value_counts)
  dict['Predicted']=max_key
  # print('Predicted',max_key)
  dict['Actual']=TEST[-1]
  # print('Actual',test[4])
  return dict

In [76]:
predictOneValue(TEST_LIST[00],TRAIN_LIST,100)

{'ValueCounts': Counter({0.0: 46, 1.0: 23, 2.0: 31}),
 'Predicted': 0.0,
 'Actual': 0.0}

In [77]:
def predictAllValues(TEST_LIST,TRAIN_LIST,K):
  lst1=[]
  len_list=len(TEST_LIST)
  count=0
  for test in TEST_LIST:
    op=predictOneValue(test,TRAIN_LIST,K)
    lst1.append(op)
    if op['Predicted']==op['Actual']:
      count+=1
  Accuracy=round(((count/len_list)*100),2)
  return lst1,'Accuracy :',Accuracy



In [78]:
predictAllValues(TEST_LIST,TRAIN_LIST,10)

([{'ValueCounts': Counter({0.0: 10}), 'Predicted': 0.0, 'Actual': 0.0},
  {'ValueCounts': Counter({1.0: 5, 2.0: 4, 0.0: 1}),
   'Predicted': 1.0,
   'Actual': 0.0},
  {'ValueCounts': Counter({0.0: 10}), 'Predicted': 0.0, 'Actual': 0.0},
  {'ValueCounts': Counter({0.0: 10}), 'Predicted': 0.0, 'Actual': 0.0},
  {'ValueCounts': Counter({0.0: 10}), 'Predicted': 0.0, 'Actual': 0.0},
  {'ValueCounts': Counter({0.0: 10}), 'Predicted': 0.0, 'Actual': 0.0},
  {'ValueCounts': Counter({0.0: 10}), 'Predicted': 0.0, 'Actual': 0.0},
  {'ValueCounts': Counter({0.0: 10}), 'Predicted': 0.0, 'Actual': 0.0},
  {'ValueCounts': Counter({1.0: 3, 0.0: 2, 2.0: 5}),
   'Predicted': 2.0,
   'Actual': 0.0},
  {'ValueCounts': Counter({0.0: 3, 2.0: 6, 1.0: 1}),
   'Predicted': 2.0,
   'Actual': 0.0},
  {'ValueCounts': Counter({0.0: 10}), 'Predicted': 0.0, 'Actual': 0.0},
  {'ValueCounts': Counter({0.0: 9, 1.0: 1}), 'Predicted': 0.0, 'Actual': 0.0},
  {'ValueCounts': Counter({0.0: 10}), 'Predicted': 0.0, 'Actual': 

In [79]:
predictOneValue([14.65, 4.67, 3.12, 36.0, 75.0, 2.92, 3.61, 0.7, 3.34, 3.6, 3.36, 7.21, 400.0, 'A'],TRAIN_LIST,5)

{'ValueCounts': Counter({1.0: 4, 2.0: 1}), 'Predicted': 1.0, 'Actual': 'A'}