# new life

In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
import pickle

# Load the Data from JSON Files
print("Loading data from JSON files...")
train_df = pd.read_json('train_df.json', orient='records', lines=True)  # Adjust the path as needed

# Text Preprocessing using TF-IDF
tfidf_vectorizer = TfidfVectorizer(max_features=5000)  # Adjust the number of features based on your dataset
X_text = tfidf_vectorizer.fit_transform(train_df['text']).toarray()  # Convert text to feature vectors

# Encode the text labels (if any)
label_encoder = LabelEncoder()
train_df['text_encoded'] = label_encoder.fit_transform(train_df['text'])

# Features (TF-IDF + other numerical features)
X = np.hstack((X_text, train_df[['quarter', 'year']].values))  # Combine TF-IDF features with other columns

# Target
y = train_df['value_standardized']  # Update with your target column

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the Features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Neural Network Model
print("Training Neural Network...")
nn_model = Sequential()
nn_model.add(Dense(128, input_dim=X_train_scaled.shape[1], activation='relu'))
nn_model.add(Dropout(0.3))
nn_model.add(Dense(64, activation='relu'))
nn_model.add(Dropout(0.3))
nn_model.add(Dense(32, activation='relu'))
nn_model.add(Dense(1, activation='linear'))  # Output layer with linear activation

# Compile the model
nn_model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# Train the model
nn_model.fit(X_train_scaled, y_train, epochs=250, batch_size=32, validation_data=(X_test_scaled, y_test))

# Evaluate the Model
y_train_pred = nn_model.predict(X_train_scaled)
y_test_pred = nn_model.predict(X_test_scaled)

mse_train = mean_squared_error(y_train, y_train_pred)
r2_train = r2_score(y_train, y_train_pred)
mae_train = mean_absolute_error(y_train, y_train_pred)

mse_test = mean_squared_error(y_test, y_test_pred)
r2_test = r2_score(y_test, y_test_pred)
mae_test = mean_absolute_error(y_test, y_test_pred)

# Print Results
print(f"Neural Network - Training Metrics:")
print(f"  MSE: {mse_train:.4f}")
print(f"  R²: {r2_train:.4f}")
print(f"  MAE: {mae_train:.4f}")

print(f"Neural Network - Testing Metrics:")
print(f"  MSE: {mse_test:.4f}")
print(f"  R²: {r2_test:.4f}")
print(f"  MAE: {mae_test:.4f}")

# Save the Model, Scaler, LabelEncoder, and TF-IDF Vectorizer
print("Saving the model, scaler, label encoder, and tf-idf vectorizer...")
nn_model.save('nn_model.h5')
with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)
with open('label_encoder.pkl', 'wb') as le_file:
    pickle.dump(label_encoder, le_file)
with open('tfidf_vectorizer.pkl', 'wb') as tfidf_file:
    pickle.dump(tfidf_vectorizer, tfidf_file)

print("Model, scaler, label encoder, and tf-idf vectorizer saved successfully.")


Loading data from JSON files...
Training Neural Network...
Epoch 1/250


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 1.0388 - val_loss: 0.6478
Epoch 2/250
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.4794 - val_loss: 0.3444
Epoch 3/250
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4267 - val_loss: 0.2628
Epoch 4/250
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.5246 - val_loss: 0.2591
Epoch 5/250
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.3292 - val_loss: 0.1846
Epoch 6/250
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2932 - val_loss: 0.1660
Epoch 7/250
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2653 - val_loss: 0.1804
Epoch 8/250
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2112 - val_loss: 0.1502
Epoch 9/250
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m



Neural Network - Training Metrics:
  MSE: 0.0497
  R²: 0.9486
  MAE: 0.0988
Neural Network - Testing Metrics:
  MSE: 0.0870
  R²: 0.9234
  MAE: 0.1370
Saving the model, scaler, label encoder, and tf-idf vectorizer...
Model, scaler, label encoder, and tf-idf vectorizer saved successfully.


In [21]:
import pickle
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from keras.models import load_model

# Load the saved model, scaler, label encoder, and tf-idf vectorizer
nn_model = load_model('nn_model.h5')
with open('scaler.pkl', 'rb') as scaler_file:
    scaler = pickle.load(scaler_file)
with open('label_encoder.pkl', 'rb') as le_file:
    label_encoder = pickle.load(le_file)
with open('tfidf_vectorizer.pkl', 'rb') as tfidf_file:
    tfidf_vectorizer = pickle.load(tfidf_file)

# Function to preprocess user input and get the prediction
def process_user_input(quarter, year, text_input):
    # Convert text using the same TF-IDF vectorizer
    text_input_tfidf = tfidf_vectorizer.transform([text_input]).toarray()

    # Prepare the input features
    input_features = np.hstack((text_input_tfidf, np.array([[quarter, year]])))

    # Scale the features
    input_scaled = scaler.transform(input_features)

    # Make the prediction (standardized output)
    predicted_value_standardized = nn_model.predict(input_scaled)[0][0]

    # Unstandardize the predicted value
    # Create a dummy input for inverse_transform
    dummy_scaled = np.zeros_like(input_scaled)
    dummy_scaled[0, -1] = predicted_value_standardized  # Set only the last feature (output)

    # Use inverse_transform to unscale the predicted value
    predicted_value_original = scaler.inverse_transform(dummy_scaled)[0, -1]

    return predicted_value_original

# Chatbot interaction
print("Hello! I can help you predict a value based on the quarter, year, and text input.")
print("Type 'exit' to quit the chatbot.")

while True:
    try:
        # Get user input
        quarter_input = input("Enter the quarter (1-4): ")
        if quarter_input.lower() == 'exit':
            print("Goodbye!")
            break
        quarter = int(quarter_input)

        year_input = input("Enter the year: ")
        if year_input.lower() == 'exit':
            print("Goodbye!")
            break
        year = int(year_input)

        text_input = input("Enter the text (e.g., description or category): ")
        if text_input.lower() == 'exit':
            print("Goodbye!")
            break

        # Get the predicted value
        predicted_value = process_user_input(quarter, year, text_input)
        
        print(f"Predicted value: {predicted_value:.4f}")
        
    except Exception as e:
        print(f"Error: {e}")
        print("Please try again.")




Hello! I can help you predict a value based on the quarter, year, and text input.
Type 'exit' to quit the chatbot.


Enter the quarter (1-4):  2
Enter the year:  2024
Enter the text (e.g., description or category):  revenue


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
Predicted value: 4252.6802


Enter the quarter (1-4):  exit


Goodbye!
