In [29]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [30]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import OneHotEncoder
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler, OrdinalEncoder


In [31]:
df = pd.read_csv('/content/drive/MyDrive/oracle/ClosedStack_data_Virtual.csv', low_memory= False, encoding='latin-1')

In [32]:
df_new = df[[ 'Job ID','Hardware ID', 'Hardware Name', 'Solution ID', 'Ticket Site','Site Name', 'Job Created']]

In [62]:
df_new_3 = df_new.loc[(df_new['Job Created'] <= "2022-09-01 00:00:00.000000") & (df_new['Job Created'] >= "2020-08-02 00:00:00.000000")]
df_filtered1 = df_new_3.dropna(subset=['Job ID', 'Job Created', 'Solution ID']).copy()
df_filtered1.loc[:, 'Job Created'] = pd.to_datetime(df_filtered1['Job Created']).dt.strftime('%Y-%m-%d')


In [63]:
# One-Hot encode the 'Solution ID' column
onehot_encoder = OneHotEncoder()
solution_id_encoded = onehot_encoder.fit_transform(df_filtered1[['Solution ID']]).toarray()

In [64]:
# One-Hot encode the categorical columns (in this case, 'Job ID')
categorical_cols = ['Job ID']
encoder = OneHotEncoder()
categorical_encoded = encoder.fit_transform(df_filtered1[categorical_cols]).toarray()

In [65]:
onehot_encoder = OneHotEncoder()
jobcreated_encoded = onehot_encoder.fit_transform(df_filtered1[['Job Created']]).toarray()

In [66]:
# Concatenate one-hot encoded 'Solution ID' and categorical columns
X_categorical = np.concatenate([categorical_encoded, solution_id_encoded], axis=1)

In [67]:
y = np.concatenate([jobcreated_encoded], axis=1)

In [68]:
# PCA for combined features
pca_categorical = PCA(n_components=50)
X_catagorical_pca = pca_categorical.fit_transform(X_categorical)

In [69]:
# Split the data into training and testing sets
X_train2, X_test2, y_train2, y_test2 = train_test_split(X_catagorical_pca, y, test_size=0.3, random_state=42)

X_train2 = X_train2.astype(np.float64)
X_test2 = X_test2.astype(np.float64)

y_train2 = y_train2.astype(np.float64)
y_test2 = y_test2.astype(np.float64)

In [70]:
# Convert NumPy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train2, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train2, dtype=torch.float32)
if y_train_tensor.dim() == 3:
    y_train_tensor = y_train_tensor.squeeze(2)

X_test_tensor = torch.tensor(X_test2, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test2, dtype=torch.float32)
if y_test_tensor.dim() == 3:
    y_test_tensor = y_test_tensor.squeeze(2)

In [71]:
# Create DataLoader for training and testing data
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [75]:
# Define custom neural network model
class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)  # Input size is the number of features after PCA
        self.fc2 = nn.Linear(128, 581)  # Output size is 1 for regression task

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [76]:
# Initialize the model, loss function, and optimizer
model = NeuralNetwork(input_size=X_catagorical_pca.shape[1])
criterion = nn.MSELoss()  # Mean Squared Error loss for regression task
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [77]:
# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

In [46]:
# Evaluate the model
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    mse = criterion(test_outputs, y_test_tensor)
    print("Mean Squared Error on Test Data:", mse.item())

Mean Squared Error on Test Data: 0.0016873020213097334


In [47]:
df_new_2 = df_new.loc[(df_new['Job Created'] <= "2020-08-01 00:00:00.000000") & (df_new['Job Created'] >= "2019-01-01 00:00:00.000000")]
df_filtered = df_new_2.dropna(subset=['Job ID', 'Job Created', 'Solution ID']).copy()
df_filtered.loc[:, 'Job Created'] = pd.to_datetime(df_filtered['Job Created']).dt.strftime('%Y-%m-%d')


In [48]:
# One-Hot encode the 'Solution ID' column
onehot_encoder = OneHotEncoder()
solution_id_encoded = onehot_encoder.fit_transform(df_filtered[['Solution ID']]).toarray()

In [49]:
# One-Hot encode the categorical columns (in this case, 'Job ID')
categorical_cols = ['Job ID']
encoder = OneHotEncoder()
categorical_encoded = encoder.fit_transform(df_filtered[categorical_cols]).toarray()

In [50]:
onehot_encoder = OneHotEncoder()
jobcreated_encoded = onehot_encoder.fit_transform(df_filtered[['Job Created']]).toarray()

In [51]:
# Concatenate one-hot encoded 'Solution ID' and categorical columns
X_categorical = np.concatenate([categorical_encoded, solution_id_encoded], axis=1)

In [52]:
y = np.concatenate([jobcreated_encoded], axis=1)

In [53]:
# PCA for combined features
pca_categorical = PCA(n_components=50)
X_catagorical_pca = pca_categorical.fit_transform(X_categorical)

In [54]:
# Split the data into training and testing sets
X_train1, X_test1, y_train1, y_test1 = train_test_split(X_catagorical_pca, y, test_size=0.3, random_state=42)

X_train1 = X_train1.astype(np.float64)
X_test1 = X_test1.astype(np.float64)

y_train1 = y_train1.astype(np.float64)
y_test1 = y_test1.astype(np.float64)

In [55]:
# Convert NumPy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train1, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train1, dtype=torch.float32)
if y_train_tensor.dim() == 3:
    y_train_tensor = y_train_tensor.squeeze(2)

X_test_tensor = torch.tensor(X_test1, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test1, dtype=torch.float32)
if y_test_tensor.dim() == 3:
    y_test_tensor = y_test_tensor.squeeze(2)

In [56]:
# Create DataLoader for training and testing data
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [57]:
# Define custom neural network model
class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)  # Input size is the number of features after PCA
        self.fc2 = nn.Linear(128, 409)  # Output size is 1 for regression task

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [58]:
# Initialize the model, loss function, and optimizer
model = NeuralNetwork(input_size=X_catagorical_pca.shape[1])
criterion = nn.MSELoss()  # Mean Squared Error loss for regression task
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [59]:
# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

In [60]:
# Evaluate the model
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    mse = criterion(test_outputs, y_test_tensor)
    print("Mean Squared Error on Test Data:", mse.item())

Mean Squared Error on Test Data: 0.0024019854608923197


In [61]:
combined_X_train = np.concatenate([X_train1, X_train2], axis=0)
combined_y_train = np.concatenate([y_train1, y_train2], axis=0)

combined_X_test = np.concatenate([X_test1, X_test2], axis=0)
combined_y_test = np.concatenate([y_test1, y_test2], axis=0)


ValueError: ignored

In [None]:
df['Solution ID'].unique()