In [2]:
import pandas as pd
import numpy as np
import collections

In [2]:
def construct_pandas_frame(html, attributes):
    '''
    Creats a pandas dataframe from a csv like data format from a csv
    Also assumes that the header is not in the csv representation nor the index name
    :param: html - string for the location of the csv like data on website
    :param: attributes - list of strings of the given data set in order
    :returns: pandas dataframe
    '''
    df = pd.read_csv(html, header = None)
    df.columns = attributes
    return df
    
    

In [3]:
class Perceptron(object):
    def __init__(self, eta, iters):
        '''
        simple constructor function
        :param: _iter - int, number of iterations
        :param: eta - int, acts as a scalar how much to change weights by
        '''
        self.eta = eta
        self.iters = iters
    
    def learn(self, row_vectors, output_vectors):
        '''
        Moves through each row of attributes, and finds a prediction.
        Then if necessary, updates the weights to see if 
        '''
        #Generate random number for the length of all rows
        generator = np.random.RandomState(1)
        
        #Because the output_vector and row_vector sizes are equal we just pick one to find size
        self.weights = generator.normal(loc=0.0, scale=.01, size = len(row_vectors[0])+1)
        for iter in range(self.iters):
            error = 0 #initializes error counter to be zero of iter
            
            for row_vector, output_vector in zip(row_vectors, output_vectors):
               
                #create a prediction using the weights and given row_vector
                prediction = self.predict(row_vector)
                error = error + np.where(output_vector == prediction,0,1) 
                for j in range(len(row_vectors[0])):
                    self.weights[j] = self.weights[j] + self.eta * (output_vector - prediction) * row_vector[j]
            print(f"error: {error}, weights {self.weights}")
            
    def predict(self, row_vector):
        '''
        Takes a row_vector and uses dot product across weights, if output is positive
        scales the prediction to be 1, else zeros
        :param: row_vector - vector, that contains attribute data about single sample
        :returns: a prediction as a 1 or -1
        '''
        input = np.dot(row_vector, self.weights[1:] + self.weights[0])
        prediction = np.where(input>=0, 1, -1)
        return prediction
        

In [4]:
html = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
attributes = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
df = construct_pandas_frame(html, attributes)


In [5]:
x = df.iloc[0:100,[0,2]].values #row vector
y = df.iloc[0:100,4].values #output vectors
y = np.where(y=='Iris-setosa',1,-1)
verify_count = collections.Counter(y) # This is just a quick check we have 50 setosa, 50 not setosa
verify_count
len(x[0])
#model = perceptron(eta = 0.1, iter =10)
#model.learn(x,y)

2

In [6]:
model = Perceptron(eta= .1, iters = 10)
model.learn(x,y)

error: 1, weights [-1.38375655 -0.94611756 -0.00528172]
error: 3, weights [-0.78375655 -1.32611756 -0.00528172]
error: 3, weights [-0.18375655 -1.70611756 -0.00528172]
error: 3, weights [ 0.41624345 -2.08611756 -0.00528172]
error: 3, weights [ 0.89624345 -2.52611756 -0.00528172]
error: 2, weights [ 0.51624345 -3.18611756 -0.00528172]
error: 3, weights [ 1.11624345 -3.56611756 -0.00528172]
error: 3, weights [ 1.71624345 -3.94611756 -0.00528172]
error: 3, weights [ 2.31624345 -4.32611756 -0.00528172]
error: 4, weights [  1.81624345e+00  -5.64611756e+00  -5.28171752e-03]


In [36]:
 df = pd.read_csv('winequality-red.csv')
df.shape

(1599, 12)

In [47]:
#df.loc[df['column_name'].isin(some_values)]
#df_8=df.loc[df['quality']==8]
df['alcohol']

0        9.4
1        9.8
2        9.8
3        9.8
4        9.4
5        9.4
6        9.4
7       10.0
8        9.5
9       10.5
10       9.2
11      10.5
12       9.9
13       9.1
14       9.2
15       9.2
16      10.5
17       9.3
18       9.0
19       9.2
20       9.4
21       9.7
22       9.5
23       9.4
24       9.7
25       9.3
26       9.5
27       9.5
28       9.4
29       9.8
        ... 
1569    11.5
1570    12.4
1571    11.1
1572     9.5
1573    12.5
1574    10.5
1575    11.8
1576    10.8
1577    11.9
1578    11.3
1579    11.3
1580    11.9
1581    11.3
1582    11.9
1583     9.8
1584    11.6
1585    11.5
1586    11.4
1587    10.9
1588    12.8
1589     9.2
1590    11.6
1591    11.6
1592    11.0
1593     9.5
1594    10.5
1595    11.2
1596    11.0
1597    10.2
1598    11.0
Name: alcohol, Length: 1599, dtype: float64

In [35]:
df_8.shape

(18, 12)

In [37]:
df_8

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
267,7.9,0.35,0.46,3.6,0.078,15.0,37.0,0.9973,3.35,0.86,12.8,8
278,10.3,0.32,0.45,6.4,0.073,5.0,13.0,0.9976,3.23,0.82,12.6,8
390,5.6,0.85,0.05,1.4,0.045,12.0,88.0,0.9924,3.56,0.82,12.9,8
440,12.6,0.31,0.72,2.2,0.072,6.0,29.0,0.9987,2.88,0.82,9.8,8
455,11.3,0.62,0.67,5.2,0.086,6.0,19.0,0.9988,3.22,0.69,13.4,8
481,9.4,0.3,0.56,2.8,0.08,6.0,17.0,0.9964,3.15,0.92,11.7,8
495,10.7,0.35,0.53,2.6,0.07,5.0,16.0,0.9972,3.15,0.65,11.0,8
498,10.7,0.35,0.53,2.6,0.07,5.0,16.0,0.9972,3.15,0.65,11.0,8
588,5.0,0.42,0.24,2.0,0.06,19.0,50.0,0.9917,3.72,0.74,14.0,8
828,7.8,0.57,0.09,2.3,0.065,34.0,45.0,0.99417,3.46,0.74,12.7,8


In [45]:
df_8.iloc[:,[9,10]]


Unnamed: 0,sulphates,alcohol
267,0.86,12.8
278,0.82,12.6
390,0.82,12.9
440,0.82,9.8
455,0.69,13.4
481,0.92,11.7
495,0.65,11.0
498,0.65,11.0
588,0.74,14.0
828,0.74,12.7


In [48]:
 df = pd.read_csv('iris.csv')

In [49]:
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
5,5.4,3.9,1.7,0.4,Iris-setosa
6,4.6,3.4,1.4,0.3,Iris-setosa
7,5.0,3.4,1.5,0.2,Iris-setosa
8,4.4,2.9,1.4,0.2,Iris-setosa
9,4.9,3.1,1.5,0.1,Iris-setosa
