In [1]:
import numpy as np

def relu_backward(z):
    return (z > 0).astype(int)

def tanh_backward(z):
    return 1 - z**2

def initialize_deep_layers(layers_dims):
    
    #with he -initilization but have to find out why it works better than random initialization
    
    L = len(layers_dims)
    parameters = {}
    
    for i in range(1,L):
        
        parameters[f'W{i}'] = np.random.randn(layers_dims[i], layers_dims[i-1]) * np.sqrt(1/layers_dims[i-1])
        parameters[f'b{i}'] = np.zeros((layers_dims[i],1))
        
    return parameters

def L_backward_propopgation(parameters, cache, X, Y, activation='tanh', lambd=0.0):
    
    m = X.shape[1]
    L = len(parameters) // 2
    grads = {}
    
    for i in range(L,0,-1):
        if i == L:
            #assummption last layers activation func is sigmoid
            #otherwise remove this block make changes in grad func in next block according to your choice
            grads[f'dZ{i}'] = cache[f'A{i}'] - Y
            
        else:
            grads[f'dA{i}'] = np.dot(parameters[f'W{i+1}'].T, grads[f'dZ{i+1}'] )
            
            grad_func = relu_backward if activation == 'relu' else tanh_backward
            
            grads[f'dZ{i}'] = grads[f'dA{i}'] * grad_func( cache[f'A{i}'] )
            
        if i != 1 :
            grads[f'dW{i}'] = 1./m * np.dot(grads[f'dZ{i}'] ,cache[f'A{i-1}'].T) 
        else:
            grads[f'dW{i}'] = 1./m * np.dot(grads[f'dZ{i}'] ,X.T)
            
        grads[f'dW{i}'] = grads[f'dW{i}'] + (lambd/m) *(parameters[f'W{i}'])
        
        grads[f'db{i}'] = 1./m * np.sum(grads[f'dZ{i}'] ,axis=1, keepdims=True)
        
    return grads
    
    

In [2]:
params = initialize_deep_layers([3,4,2,1])

In [3]:
def sigmoid( z ) :
    return 1 / ( 1 + np.exp( -z ) )

In [4]:
def L_feed_forward(paramters, X):
    
    L = len(paramters) //2
    
    cache = {}
    
    for l in range(1,L+1):
        
        if l==1:
            cache[f'Z{l}'] = np.dot(paramters[f'W{l}'],X) + paramters[f'b{l}']
        else:
            cache[f'Z{l}'] = np.dot(paramters[f'W{l}'],cache[f'A{l-1}']) + paramters[f'b{l}']
            
        if l!=L:
            cache[f'A{l}'] = np.tanh(cache[f'Z{l}'])
        else:
            cache[f'A{L}'] = sigmoid(cache[f'Z{l}'])

    return cache[f'A{L}'] , cache



In [5]:
def compute_cost(AL , Y ,parameters, lambd =0.0):
    
    #if lambd == 0 then it should mean that you dont want to use regularization
    
    m= Y.shape[1]
    
    logprobs = ( Y * np.log( AL ) ) + ( ( 1 - Y ) * np.log( 1 - AL ) )
    cost = -1./m * np.sum(logprobs)
    cost = float(np.squeeze(cost))
    
    regularized_cost = 0
    
    L = len(parameters) // 2
    
    for i in range(1,L+1):
        
        regularized_cost += np.sum(np.square(parameters[f'W{i}']))
        
    regularized_cost = (lambd / (2*m) ) * regularized_cost
    
    return cost + regularized_cost

In [6]:
def update_parameters(parameters, grads, learning_rate = 0.01):
    
    L = len(parameters) // 2
    for i in range(1,L+1):
        parameters[f'W{i}'] = parameters[f'W{i}'] - learning_rate* (grads[f'dW{i}'])
        parameters[f'b{i}'] = parameters[f'b{i}'] - learning_rate* (grads[f'db{i}'])
        
    return parameters


In [7]:
def model(X,Y,layers_dims,learning_rate=0.01, lambd=0.0, num_iterations = 200, print_cost_every_n_steps=1):
    print(lambd,'lambda')
    parameters = initialize_deep_layers(layers_dims)
    
    for i in range(num_iterations):
        
        AL,cache = L_feed_forward(parameters,X)

        cost = compute_cost(AL, Y, parameters,lambd=lambd)
        
        if i % print_cost_every_n_steps == 0:
            print(f'cost at iteration {i} = {cost}' )

        grads = L_backward_propopgation(parameters, cache, X, Y,lambd=lambd)

        parameters = update_parameters(parameters, grads, learning_rate)
        
    return parameters

In [8]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split

In [9]:
tweets_dataset = pd.read_csv( '~/NLP/NLP_Disaster_tweets/train.csv')
from nltk.stem import WordNetLemmatizer
from nltk.stem.snowball import SnowballStemmer

lemmatizer = WordNetLemmatizer()
stemmer = SnowballStemmer( 'english')

def num_remover( val):
    tokens = val.split()
    nums = [ str(i) for i in range(10)]
    final_tokens = []
    for token in tokens:
        token = token.strip()
        if not any( token.startswith( num ) for num in nums):
            final_tokens.append(token)
    return ' '.join(final_tokens)


def replace_urls(text):
    tokens = text.split()
    
    final_tokens = []
    
    for token in tokens:
        if token.lower().startswith('http'):
            final_tokens.append('url')
        elif token.lower().startswith('@'):
            final_tokens.append('taggeduser')
        else:
            final_tokens.append(token)
    return ' '.join(final_tokens)


def clean_text(df):

    replace_words = [ '&amp' , 'and' , '#' ]

    df['text'] = df['text'].apply(replace_urls)

    for word in replace_words :
        df[ 'text' ] = df[ 'text' ].str.replace( word , '' )

    df[ 'text' ] = df['text' ].apply( lambda txt : ' '.join( stemmer.stem(lemmatizer.lemmatize( word ) ) for word in txt.split( ' ') ) )

    df['keyword'] = df['keyword'].fillna('').str.replace('%20' , ' ')
    df[ 'text' ] = df.apply( lambda row : str( row[ 'text' ] ) + ' ' + str(row[ 'keyword' ]) if row[ 'keyword' ] else row[ 'text' ] , axis = 1)

    df['text'] = df['text'].apply(num_remover)
    
    return df

In [10]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.feature_extraction.text import CountVectorizer , TfidfTransformer
from sklearn.preprocessing import StandardScaler

class MyDataTransformer(BaseEstimator, TransformerMixin):
    
    def __init__(self):
        self.count_vectorizer = CountVectorizer()
        self.standard_scaler = StandardScaler()
    
    def fit(self, X, y=None):
        
        x = clean_text(X)
        x = self.count_vectorizer.fit_transform(X['text'])
        x = x.toarray()
        x = self.standard_scaler.fit(x)
        return self
        
    def transform(self, X, y=None):
        
        x = clean_text(X)
        x = self.count_vectorizer.transform(X['text'])
        x = x.toarray()
        x = self.standard_scaler.transform(x)
        x = x.T
        return x

In [11]:
import warnings
warnings.filterwarnings('ignore')

In [20]:
train_df = pd.read_csv( '~/NLP/NLP_Disaster_tweets/train.csv')

In [21]:
X_train, X_test, y_train, y_test= train_test_split(train_df, train_df['target'])

In [22]:
my_data_transformer = MyDataTransformer()

X_train = my_data_transformer.fit_transform(X_train)

y_train = np.array(y_train).reshape(1,-1)

In [23]:

layers_dims = [X_train.shape[0],256, 128, 1]

In [24]:
parameters = model(X_train,y_train,layers_dims,learning_rate=0.09, lambd=1.0,num_iterations = 20)

1.0 lambda
cost at iteration 0 = 0.7572016821141467
cost at iteration 1 = 0.7238679518735981
cost at iteration 2 = 0.6948694248669677
cost at iteration 3 = 0.668881489663552
cost at iteration 4 = 0.645025065297518
cost at iteration 5 = 0.6227083300956007
cost at iteration 6 = 0.601529884734578
cost at iteration 7 = 0.5812182079842272
cost at iteration 8 = 0.561592514216844
cost at iteration 9 = 0.5425364305790784
cost at iteration 10 = 0.5239795625880667
cost at iteration 11 = 0.5058840988463813
cost at iteration 12 = 0.4882347954692656
cost at iteration 13 = 0.4710313622675012
cost at iteration 14 = 0.4542826593739638
cost at iteration 15 = 0.4380023264003614
cost at iteration 16 = 0.42220557853099855
cost at iteration 17 = 0.40690695995018467
cost at iteration 18 = 0.3921188729157499
cost at iteration 19 = 0.37785071790123553


In [25]:
X_test = my_data_transformer.transform(X_test)

y_test = np.array(y_test).reshape(1,-1)

In [26]:
y_pred, _ = L_feed_forward(parameters,X_test)

y_pred = np.where(y_pred>0.5,1,0)

y_pred

y_test

from sklearn.metrics import accuracy_score
accuracy_score(y_test[0], y_pred[0])

0.7752100840336135

In [27]:
y_pred, _ = L_feed_forward(parameters,X_train)

y_pred = np.where(y_pred>0.5,1,0)

y_pred

y_train

from sklearn.metrics import accuracy_score
accuracy_score(y_train[0], y_pred[0])

0.9194254685584166