In [None]:

#  TensorFlow or PyTorch may outperform RandomForest Regressor in scenarios where the data has complex patterns or 
#  dependencies that are challenging for traditional machine learning models to capture. Here are some examples where 
#  deep learning models might be advantageous.

#  Image Recognition:
#  Deep learning models, especially convolutional neural networks (CNNs), excel in image recognition tasks. They can 
#  automatically learn hierarchical features from raw pixel values, capturing intricate patterns in images.

#  Natural Language Processing (NLP):
#  Recurrent Neural Networks (RNNs) or Transformer-based models (like BERT or GPT) are widely used for NLP tasks such as 
#  sentiment analysis, machine translation, and text generation. They can capture sequential dependencies and contextual 
#  information in text data.

#  Speech Recognition:
#  Deep learning models, particularly recurrent neural networks (RNNs) or deep neural networks (DNNs), are effective in 
#  speech recognition tasks. They can learn complex mappings from audio signals to textual representations.

#  Time Series Forecasting:
#  For time series data with long-term dependencies, recurrent neural networks (RNNs) or Long Short-Term Memory networks 
#  (LSTMs) may outperform traditional models. They can capture temporal patterns and dependencies over extended sequences.

#  Unstructured Data:
#  When dealing with unstructured data such as raw audio, video, or 3D data, deep learning models can automatically 
#  learn hierarchical representations without the need for handcrafted features.

#  Transfer Learning:
#  Pre-trained deep learning models, such as those trained on large image datasets (e.g., ImageNet), can be fine-tuned 
#  for specific tasks with limited labeled data. This is particularly beneficial when working with small datasets.

#  Generative Tasks:
#  Deep learning models, especially Generative Adversarial Networks (GANs) and Variational Autoencoders (VAEs), are used 
#  for generative tasks such as image generation, style transfer, and data synthesis.
     

In [15]:

# Here is a pretty simple scenario where a RandomForestRegressor, from the world of Scikit-Learn, outperforms more
# complex methodologies TensorFlow and PyTorch.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pylab import rcParams
import seaborn as sb
import scipy
from scipy.stats import spearmanr
from sklearn import datasets
import statsmodels.api as sm
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error


mtcars = sm.datasets.get_rdataset("mtcars", "datasets", cache=True).data
df = pd.DataFrame(mtcars)
print(df.columns)
df.head()


# Select features (X) and target variable (y)
X = df[['cyl','disp','hp','drat','wt','qsec','vs','am','gear','carb']]
y = df['mpg']


# Split the data into training and testing sets
# This line splits the data into training and testing sets using the train_test_split function. The training set 
# (80%) is used to train the model, and the testing set (20%) is used to evaluate its performance.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Create a RandomForestRegressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)


# The model is trained on the training set using the fit method.
model.fit(X_train, y_train)


# The trained model is used to make predictions on the testing set.
predictions = model.predict(X_test)

print('----------')
print(X.iloc[0])
print('----------')
print(y.iloc[0])
print('----------')

# The Mean Absolute Error (MAE) is calculated to evaluate the model's performance. MAE represents the average 
# absolute difference between the predicted and actual prices in the testing set.
mae = mean_absolute_error(y_test, predictions)
print(f'Mean Absolute Error: {mae}')


new_data = pd.DataFrame({'cyl': [6], 
                         'disp': [160], 
                         'hp': [110], 
                         'drat': [3.90], 
                         'wt': [2.6], 
                         'qsec': [16.4], 
                         'vs': [0], 
                         'am': [1], 
                         'gear': [4], 
                         'carb': [4]})

predicted_mpg = model.predict(new_data)
print(f'Predicted MPG: {predicted_mpg[0]:,.2f}')

# We know the actual MPG for the given independent variables is 21.0
# we see that the predicted value is 20.82
# The difference is: =21/20.82-1
# 0.86%! Not bad!!


Index(['mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear',
       'carb'],
      dtype='object')
----------
cyl       6.00
disp    160.00
hp      110.00
drat      3.90
wt        2.62
qsec     16.46
vs        0.00
am        1.00
gear      4.00
carb      4.00
Name: Mazda RX4, dtype: float64
----------
21.0
----------
Mean Absolute Error: 1.6839999999999964
Predicted MPG: 20.82


In [10]:

# Using TensorFlow:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Define the model
model_tf = Sequential([
    Dense(64, activation='relu', input_shape=(10,)),
    Dense(1)
])

# Compile the model
model_tf.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model_tf.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate the model
loss = model_tf.evaluate(X_test, y_test)
print(f'Mean Squared Error: {loss}')

# Make predictions
predictions_tf = model_tf.predict(X_test)

# Calculate Mean Absolute Error
mae_tf = mean_absolute_error(y_test, predictions_tf)
print(f'Mean Absolute Error (TensorFlow): {mae_tf}')

# Make prediction on new data
new_data_tf = np.array([[6, 160, 110, 3.90, 2.6, 16.4, 0, 1, 4, 4]])
predicted_mpg_tf = model_tf.predict(new_data_tf)
print(f'Predicted MPG (TensorFlow): {predicted_mpg_tf[0, 0]:,.2f}')

# We know the actual MPG for the given independent variables is 21.0
# we see that the predicted value is 15.94
# The difference is: =21/15.94-1
# 31.74%!!!! Not very accurate at all!!!! 




Epoch 1/50

Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Mean Squared Error: 195.22267150878906
Mean Absolute Error (TensorFlow): 10.331711769104004
Predicted MPG (TensorFlow): 15.94


In [11]:

# Using PyTorch:
import torch
import torch.nn as nn
import torch.optim as optim

# Convert data to PyTorch tensors
X_train_torch = torch.FloatTensor(X_train.values)
y_train_torch = torch.FloatTensor(y_train.values).view(-1, 1)
X_test_torch = torch.FloatTensor(X_test.values)
y_test_torch = torch.FloatTensor(y_test.values).view(-1, 1)

# Define the model
class RegressionModel(nn.Module):
    def __init__(self):
        super(RegressionModel, self).__init__()
        self.fc1 = nn.Linear(10, 64)
        self.fc2 = nn.Linear(64, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model_pt = RegressionModel()

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model_pt.parameters(), lr=0.001)

# Train the model
for epoch in range(50):
    optimizer.zero_grad()
    outputs = model_pt(X_train_torch)
    loss = criterion(outputs, y_train_torch)
    loss.backward()
    optimizer.step()

# Evaluate the model
with torch.no_grad():
    outputs_test = model_pt(X_test_torch)
    loss_pt = criterion(outputs_test, y_test_torch)
    print(f'Mean Squared Error (PyTorch): {loss_pt.item()}')


# Make predictions
# Make predictions
with torch.no_grad():
    predictions_pt = model_pt(X_test_torch).detach().numpy()


# Calculate Mean Absolute Error
mae_pt = mean_absolute_error(y_test, predictions_pt)
print(f'Mean Absolute Error (PyTorch): {mae_pt}')


# Make prediction on new data
new_data_pt = torch.FloatTensor([[6, 160, 110, 3.90, 2.6, 16.4, 0, 1, 4, 4]])
predicted_mpg_pt = model_pt(new_data_pt).item()
print(f'Predicted MPG (PyTorch): {predicted_mpg_pt:,.2f}')

# We know the actual MPG for the given independent variables is 21.0
# we see that the predicted value is 15.94
# The difference is: =21/10.62-1
# 97.74%!!!! That's horrible!!!! 

Mean Squared Error (PyTorch): 163.92013549804688
Mean Absolute Error (PyTorch): 10.22669335774013
Predicted MPG (PyTorch): 10.62


In [12]:

# In the experiments shown above, the RandomForest Regressor performed significantly better in terms of MAE compared to 
# the deep learning models implemented with TensorFlow and PyTorch. A lower MAE indicates better predictive performance, 
# and in this case, the RandomForest Regressor achieved a more accurate prediction on the test set.

# However, it's essential to consider the context and the complexity of the models. Deep learning models might require 
# more tuning, a larger dataset, or a more complex architecture to outperform traditional machine learning models like 
# RandomForest. In some cases, deep learning models shine when dealing with complex patterns and large datasets, while 
# simpler models might suffice for smaller or less complex datasets.

# In practice, the choice of model depends on various factors such as dataset size, feature complexity, computational 
# resources, interpretability, and the specific requirements of the task at hand. Each model type has its strengths and 
# weaknesses, and the best choice often depends on the specific characteristics of the problem you are trying to solve.

# In summary, simpler models like RandomForest Regressor may perform well, especially with smaller datasets or when 
# interpretability is crucial. The effectiveness of deep learning models often comes into play when dealing with large, 
# complex datasets with intricate patterns that are challenging for traditional models to capture.
