## **5.1**:
Objective: To forecast future values of a univariate time series using LSTM-based models. Group Member- 1) Vivek Borade Prn- 202201040216 2) Nirmal Chaturvedi Prn- 202201040210 3) Abhijeet Jadhav Prn- 202201040122

### Step 1: Import Libraries

In [None]:
! pip install q kaggle
from google.colab import files
import pandas as pd
import numpy as np
from google.colab import autoviz
import seaborn as sns
files.upload()
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from keras.models import Sequential
from keras.layers import LSTM, Dense

### Step 2: Load Dataset

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("prasoonkottarathil/btcinusd")

print("Path to dataset files:", path)

In [None]:
import os

# List files inside the dataset folder
folder_path = "/root/.cache/kagglehub/datasets/prasoonkottarathil/btcinusd/versions/4"
files = os.listdir(folder_path)

print("Files in the folder:")
for file in files:
    print(file)


In [None]:
# Load the dataset
file_path = "/root/.cache/kagglehub/datasets/prasoonkottarathil/btcinusd/versions/4/BTC-Hourly.csv"
df = pd.read_csv(file_path)

### Step 3: Preprocess Text Data

In [None]:
# Focus on univariate 'close' column
data = df[['close']].values

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

# Create sequences (60 timesteps)
def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(sequence_length, len(data)):
        X.append(data[i-sequence_length:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

sequence_length = 60
X, y = create_sequences(scaled_data, sequence_length)

# Reshape for LSTM input [samples, time_steps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))

### Step 4: Train-Test Split

In [None]:
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

### Step 5: Build LSTM Model

In [None]:
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)))
model.add(LSTM(units=50))
model.add(Dense(units=1))

### Step 6: Train the Model

In [None]:
# Compile and train
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Predict
predicted_scaled = model.predict(X_test)
predicted = scaler.inverse_transform(predicted_scaled)
actual = scaler.inverse_transform(y_test.reshape(-1, 1))

### Step 7: Evaluate the Model

In [None]:
rmse = np.sqrt(mean_squared_error(actual, predicted))
mae = mean_absolute_error(actual, predicted)
print(f"RMSE: {rmse:.2f}")
print(f"MAE: {mae:.2f}")




```
# This is formatted as code
```

### Step 8: Plot actual vs predicted

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(actual, label='Actual BTC Close Price')
plt.plot(predicted, label='Predicted BTC Close Price')
plt.title('BTC Price Prediction vs Actual')
plt.xlabel('Time')
plt.ylabel('Price (USD)')
plt.legend()
plt.grid(True)
plt.show()

## 5.2

### Step 1: Import Libraries

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import Adam


### Step 2: Load Dataset

In [None]:
import tensorflow_datasets as tfds

# Load Shakespeare dataset from TensorFlow Datasets
# Remove as_supervised=True
dataset, info = tfds.load('tiny_shakespeare', with_info=True)
train_data = dataset['train']

# Get the raw text
text = ''
# Extract the text data from the dataset
for example in train_data:
    text += example['text'].numpy().decode('utf-8')

# Preprocessing text: convert all to lowercase and remove unnecessary characters
text = text.lower().replace('\n', ' ').replace('\r', '')




### Step 3: Vectorize the data

In [None]:
# Create a set of unique characters in the text
chars = sorted(set(text))

# Create a mapping from character to integer and vice versa
char_to_index = {char: idx for idx, char in enumerate(chars)}
index_to_char = {idx: char for idx, char in enumerate(chars)}

# Convert the entire text into a sequence of integers
text_as_int = np.array([char_to_index[char] for char in text])

### Step 4: Prepare Sequences for Training

In [None]:
# Define sequence length and batch size
sequence_length = 100
batch_size = 64

# Generate sequences of length `sequence_length`
sequences = []
next_chars = []
for i in range(len(text_as_int) - sequence_length):
    sequences.append(text_as_int[i:i+sequence_length])
    next_chars.append(text_as_int[i+sequence_length])

# Convert to numpy arrays
X = np.array(sequences)
y = np.array(next_chars)

### Step 5: Build LSTM Model

In [None]:
model = Sequential([
    Embedding(input_dim=len(chars), output_dim=256, input_length=sequence_length),
    LSTM(512, return_sequences=True),
    Dropout(0.3),
    LSTM(512),
    Dense(len(chars), activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])
model.summary()


### Step 6: Train the Model

In [None]:
history = model.fit(X, y, epochs=3, batch_size=batch_size)

### Step 7:Plot Training Accuracy and Loss

In [None]:
# Plot the accuracy and loss
plt.figure(figsize=(12, 6))

# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.title('Training Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.title('Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')

plt.tight_layout()
plt.show()


## 5.3

### Step 1: Import Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import classification_report, confusion_matrix


### Step 2: Load Dataset

In [None]:
# Load the IMDb dataset
vocab_size = 10000  # Use the top 10,000 most frequent words
maxlen = 200        # Max length of review sequences

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

print(f"Training samples: {len(x_train)}")
print(f"Test samples: {len(x_test)}")


In [None]:
# Pad sequences to ensure uniform input length
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

print(f"Padded x_train shape: {x_train.shape}")
print(f"Padded x_test shape: {x_test.shape}")


### Step 3: Build LSTM Model

In [None]:
# Define the LSTM model
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=128, input_length=maxlen))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Model summary
model.summary()




### Step 4: Train the Model

In [None]:
# Train the model
history = model.fit(x_train, y_train,
                    batch_size=64,
                    epochs=5,
                    validation_split=0.2)


### Step 5: Evaluate Model

In [None]:
# Evaluate on test data
score, acc = model.evaluate(x_test, y_test)
print("Test Accuracy:", acc)


In [None]:
# Predict and print classification report
y_pred = (model.predict(x_test) > 0.5).astype("int32")

print(classification_report(y_test, y_pred, target_names=["Negative", "Positive"]))


### Step 6: Confusion matrix

In [None]:
# Plot confusion matrix
cm = confusion_matrix(y_test, y_pred)

sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=["Negative", "Positive"],
            yticklabels=["Negative", "Positive"])
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()
