In [1]:
import numpy as np
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt # to plot error during training

In [325]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

x = np.array([1,2,3])
w = np.array([0.1,1,0.1])
sigmoid(np.dot(x,w))



0.9168273035060777

In [161]:
class NeuralNetwork:
    def __init__(self, inputSize=3, outputSize=1, hiddenSize=2):
        self.inputSize = inputSize
        self.outputSize = outputSize
        self.hiddenSize = hiddenSize
        self.W1 = np.random.rand(self.inputSize, self.hiddenSize)
        self.W2 = np.random.rand(self.hiddenSize, self.outputSize)

    def sigmoid(self, x, deriv=False):
        if deriv == True:
            return x * (1 - x)
        return 1 / (1 + np.exp(-x))

    def forward(self, X):
        self.z = np.matmul(X, self.W1)
        self.z2 = self.sigmoid(self.z)
        self.z3 = np.matmul(self.z2, self.W2)
        o = self.sigmoid(self.z3)
        return o

    def backward(self, X, y, o):
        self.o_error = y - o
        self.o_delta = self.o_error * self.sigmoid(o, True)
        self.z2_error = np.matmul(self.o_delta, np.matrix.transpose(self.W2))
        self.z2_delta = self.z2_error * self.sigmoid(self.z2, True)
        self.W1 += np.matmul(np.matrix.transpose(X), self.z2_delta)
        self.W2 += np.matmul(np.matrix.transpose(self.z2), self.o_delta)

    def train(self, X, y, epochs=10000):
        for epoch in range(epochs):
            o = self.forward(X)
            self.backward(X, y, o)

    def predict(self, x_predicted):
        return self.forward(x_predicted).item()

In [354]:
# input data
inputs = np.array([np.random.rand(3).tolist()] * 100)
# output data
outputs = np.array([[1]] * 75 + [[0]] * 25)

NN = NeuralNetwork()
NN.train(inputs, outputs)
NN.predict(inputs[0])

0.7499999999999992

In [365]:
# input data
inputs = np.array([np.random.rand(3).tolist()] * 100)
# output data
outputs = np.array([[0]] * 75 + [[1]] * 25)

clf = LogisticRegression().fit(inputs, outputs.ravel())
clf.predict_proba([np.random.rand(3)])

array([[0.75000006, 0.24999994]])

In [5]:
import requests
from bs4 import BeautifulSoup
import time

In [2]:
def getWikiLinks(page):
    response = requests.get(
        url="https://en.wikipedia.org/wiki/" + page,
    )
    soup = BeautifulSoup(response.content, 'html.parser')

    # Get all the links
    allLinks = soup.find(id="bodyContent").find_all("a")
    wikiLinks = []

    for link in allLinks:
        if (link.has_attr("href") and 
            link["href"].find("/wiki/") != -1 and 
            not link.has_attr("class")):
            
            wikiLinks.append(link["title"])

    return wikiLinks

In [6]:
times = []

with open('vocab_dev.txt', 'r') as vocab:
    limit = -1
    i = 0
    temp_time = time.time()
    for page in vocab:
        try:
            getWikiLinks(page.strip())
            if(i%20 == 0):
                ts = time.time() - temp_time
                times.append(ts)
                temp_time = time.time()
                print(i, page.strip(), ts)
        except:
            print(page)

        i += 1
        if i >= limit and limit > 0:
            break

0 Texas_annexation

0.11857008934020996
20 Gerard_Manley_Hopkins

3.1813228130340576
40 Anemia

3.469252109527588
60 Pedro_II_of_Brazil

2.6918060779571533
80 Density

2.8585779666900635
100 Mrs_Dalloway

3.49611496925354
120 Ossification

2.675758123397827
Mikhail_Bulgakov

140 Daniel_Kahneman

2.5574278831481934
160 Druze

2.109994888305664
180 French_horn

3.465520143508911
200 Mali_Empire

2.5991508960723877
Indian_National_Congress

220 Jim_(given_name)

2.514065980911255
