# Tech Company Fundings

### Importing Libraries 

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import *

In [None]:
tech_fund = pd.read_csv("tech-fundings1.csv")

### EDA

In [None]:
tech_fund.head()

In [None]:
# Remove irrelevant data

irrelevant_data= ['index','Website','Funding Date', 'Company']

tech_fund.drop(irrelevant_data, axis=1, inplace=True)

tech_fund.head()

In [None]:
tech_fund.info()

In [None]:
tech_fund.groupby('Region').describe()

In [None]:
tech_fund.groupby('Funding Stage').describe()

### Feature Engineering 

In [None]:
# Remove 'unknown' and 'undisclosed' cells from Region, Funding Stage, and Funding Amount
cond1 = tech_fund['Region'] != 'Unknown'
cond2 = tech_fund['Funding Stage'] != 'Unknown' 
cond3 = tech_fund['Funding Stage'] != 'Undisclosed' 
cond4 = tech_fund['Funding Amount (USD)'] != 'Unknown'

tech_fund = tech_fund[cond1 & cond2 & cond3 & cond4]

In [None]:
print("Funding Stage")
print(tech_fund['Funding Stage'].unique())

print("Region")
print(tech_fund['Region'].unique())

In [None]:
# Remove missing (Na) values
tech_fund.dropna(inplace=True)

In [None]:
# Review dataset after changes
tech_fund.info()

In [None]:
# Convert string to float
tech_fund['Funding Amount (USD)'] = tech_fund['Funding Amount (USD)'].astype('float')

In [None]:
# Convert type casting for Funding Amount (USD)
tech_fund.info()

In [None]:
techfund_encoded = pd.get_dummies( tech_fund, columns=['Funding Stage', 'Region', 'Vertical'] )
techfund_encoded.columns

In [None]:
for r in techfund_encoded.columns:
    print(r)

In [None]:
techfund_encoded.describe()

In [None]:
labels_techfund = techfund_encoded['Funding Amount (USD)']
features_techfund = techfund_encoded.drop('Funding Amount (USD)', axis=1, inplace= False)

In [None]:
features_techfund.head()

# Machine Learning Models 

#### Objectives:

1. To check the funding amount the companies are getting based on their Vertical, Funding Stage and Region, and to conclude which companies are getting funded the most 

2. To find and choose the best model to fit our data; provide us with the most accurate results

### Machine Learning Model (Baseline) - Gradient Boosting Regressor

In [None]:
#Test_Train Split 
trainF, testF, trainL, testL = train_test_split( features_techfund,labels_techfund, test_size=0.4, random_state=33)

In [None]:
# Gradient Boosting Regressor
model_gbr = GradientBoostingRegressor(
    n_estimators=1000,         
    learning_rate=0.1,         
    max_depth=6,               
    min_samples_leaf=9,        
    max_features=0.1,          
    loss='huber'               
)

In [None]:
# Learn from the training data set
model_gbr.fit(trainF, trainL)

### Model Metrics (Baseline) - Train and Evaluate Model

In [None]:
# Evaluate the training metrics
predicted_labels_train = model_gbr.predict(trainF)
print("MAE", mean_absolute_error(trainL, predicted_labels_train))
print("R^2", r2_score(trainL, predicted_labels_train))

In [None]:
# Evaluate the testing metrics 
predicted_labels_test = model_gbr.predict(testF)
print("MAE", mean_absolute_error(testL, predicted_labels_test))
print("R^2", r2_score(testL, predicted_labels_test))

### Deep Learning (Improved) - Neural Network

In [None]:
model_dl = Sequential()

# Input Layer
model_dl.add(
    Dense(50, input_dim=229, activation='relu')
)

# Hidden Layers
model_dl.add(
    Dense(100, activation='relu')
)
model_dl.add(
    Dense(50, activation='relu')
)


# Output Layer
model_dl.add(
    Dense(1, activation='linear')
)

In [None]:
# ADAM (ADaptive Moment Estimation) optimizer is the algorithm that is used to find the best hyperparameters 
# The ADAM algoritm uses gradient descent to find the global minima of the weight space

model_dl.compile(
    loss='mean_squared_error',
    optimizer='adam'
)

### Model Metrics (Improved) - Train and Evaluate Model


In [None]:
model_dl.fit( features_techfund, labels_techfund, epochs=50, shuffle=True, verbose=2 )

In [None]:
# MSE Train
mse_train = model_dl.evaluate( trainF, trainL, verbose=0 )
print( 'MSE Train: ', mse_train  )

In [None]:
# MSE Test 
mse_test = model_dl.evaluate( testF, testL, verbose=0 )
print( 'MSE Test: ', mse_test  )

In [None]:
# Predict with the model
# Predict the label
predictes_fundingsamount = model_dl.predict(techfund_encoded)

In [None]:
prediction_scalar = predictes_sales[0][0] # 0.29026964
prediction_scalar = prediction_scalar + 0.115913
prediction_unscaled = prediction_scalar/0.0000036968
prediction_unscaled