# **Financial Time-Series Anomaly Detection**


Objective:

The goal of this project is to create a tool that detects anomalies in stock price trends. These anomalies could represent unusual market behavior such as market manipulation, extreme fluctuations, or events that may indicate problems in the market (e.g., sudden price jumps or drops without a valid reason).

To achieve your goal of detecting anomalies in stock price trends and detecting market manipulations, here’s a step-by-step list that you can follow:

# Steps to Achieve Your Goal:

# 1. Data Collection:

- Download historical stock price data for Bitcoin (or any other stock/cryptocurrency) from Yahoo Finance or another reliable source.

- Ensure that the data includes columns like Date, Open, Close, High, Low, and Volume.

In [None]:
!pip install yfinance panda matplotlib

In [None]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt

- Download data

In [None]:
df = yf.download('BTC-USD', period='1y' , interval='1d')
df.head()

In [None]:
df.reset_index(inplace=True)

In [None]:
df['Date'] = pd.to_datetime(df['Date'])

In [None]:
df.dropna(inplace=True)

In [None]:
print("\nCleaned DataFrame Info:")
print(df.info())

In [None]:
plt.figure(figsize=(12,6))
plt.plot(df['Date'],df['Close'],label = 'Bitcoin close price')
plt.title('Bitcoin Close Price')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.grid(True)
plt.show()


# calculate RSI indicator




In [None]:
!pip install ta
import ta


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Assume your DataFrame is named df and has 'Date' and 'Close' columns
df['Close'] = df['Close'].astype(float)

# Step 1: Calculate the daily price change
delta = df['Close'].diff()

# Step 2: Separate gains and losses
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)

# Step 3: Calculate the average gain and average loss (window=14 days)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()

# Step 4: Calculate RS and then RSI
rs = avg_gain / avg_loss
rsi = 100 - (100 / (1 + rs))

# Add RSI to DataFrame
df['RSI_14'] = rsi

# Drop missing values caused by rolling
df.dropna(inplace=True)

# Show final RSI values
print(df[['Date', 'Close', 'RSI_14']].tail())


In [None]:
plt.figure(figsize=(14, 4))
plt.plot(df['Date'], df['RSI_14'], label='RSI (14)', color='purple')
plt.axhline(70, color='red', linestyle='--', label='Overbought (70)')
plt.axhline(30, color='green', linestyle='--', label='Oversold (30)')
plt.title('RSI - Relative Strength Index')
plt.xlabel('Date')
plt.ylabel('RSI Value')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


# Anomaly Detection using Isolation Forest

In [None]:
# Install required libraries (if not already installed)
!pip install yfinance pandas_ta scikit-learn matplotlib --quiet


In [None]:

from sklearn.ensemble import IsolationForest

In [None]:


# Use 'Close' and 'RSI_14' for anomaly detection
X = df[['Close', 'RSI_14']]  # Use RSI_14 instead of RSI

# Initialize the Isolation Forest model
iso_forest = IsolationForest(n_estimators=100, contamination=0.03, random_state=42)

# Fit the model and make predictions (anomalies will be labeled as -1)
df['anomaly'] = iso_forest.fit_predict(X)

# Mark anomalies as 'True' or 'False' for easy visualization
df['is_anomaly'] = df['anomaly'] == -1


In [None]:
import matplotlib.pyplot as plt

# Plot Close Price with Anomalies
plt.figure(figsize=(14, 7))
plt.plot(df.index, df['Close'], label='Close Price', color='blue')

# Highlight anomalies in red
plt.scatter(df.index[df['is_anomaly']], df['Close'][df['is_anomaly']], color='red', label='Anomaly', s=60)

# Add labels and title
plt.title('Bitcoin Close Price with Anomalies (Using Isolation Forest)', fontsize=14)
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()


# Create Modal

1. Install Required Libraries

In [None]:
!pip install tensorflow
!pip install scikit-learn


2. Data Preprocessing

You'll need to preprocess the data for LSTM. This includes:
1.   Scaling the data (using MinMaxScaler).
2.   Creating sequences of data (since LSTM works with sequences).



In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

In [None]:


# Assuming 'df' is your DataFrame containing historical stock data

# Step 1: Scaling the data (we'll scale the 'Close' price between 0 and 1)
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df['Close'].values.reshape(-1, 1))

# Step 2: Create sequences for LSTM
def create_sequences(data, sequence_length):
    x_data, y_data = [], []
    for i in range(sequence_length, len(data)):
        x_data.append(data[i-sequence_length:i, 0])  # previous `sequence_length` days
        y_data.append(data[i, 0])  # next day's closing price
    return np.array(x_data), np.array(y_data)

sequence_length = 60  # Using past 60 days to predict the next day's price
X, y = create_sequences(scaled_data, sequence_length)

# Step 3: Reshape the data for LSTM input
X = X.reshape(X.shape[0], X.shape[1], 1)  # LSTM expects 3D data (samples, time steps, features)

# Split data into training and testing sets
train_size = int(len(X) * 0.8)  # 80% for training, 20% for testing
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]



# 3. Build the LSTM Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Step 4: Build the LSTM model
model = Sequential()

# First LSTM layer
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))

# Second LSTM layer
model.add(LSTM(units=50, return_sequences=False))

# Dense output layer
model.add(Dense(units=1))  # Output layer with a single value (predicted price)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Step 5: Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32)  # Train the model



Make Predictions

In [None]:
predictions = model.predict(X_test)

# Inverse the scaling to get the actual values (scaled to original range)
predictions = scaler.inverse_transform(predictions)

In [None]:
# Step 7: Visualize the results
plt.figure(figsize=(14, 7))
plt.plot(df['Date'][-len(predictions):], df['Close'][-len(predictions):], label='Actual Price', color='blue')
plt.plot(df['Date'][-len(predictions):], predictions, label='Predicted Price', color='red')
plt.title('Bitcoin Price Prediction using LSTM')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

Model Evaluation (Optional)

In [None]:
from sklearn.metrics import mean_squared_error
import math

# Calculate Mean Squared Error
mse = mean_squared_error(y_test, predictions)
rmse = math.sqrt(mse)

print(f'Mean Squared Error: {mse}')
print(f'Root Mean Squared Error: {rmse}')


Making Future Predictions

In [None]:
# For example, predicting the next 30 days
future_predictions = model.predict(X_test[-30:])

# Inverse scaling the predictions
future_predictions = scaler.inverse_transform(future_predictions)

# Plot the predictions
plt.figure(figsize=(14, 7))
plt.plot(df['Date'][-30:], future_predictions, label='Future Predictions', color='green')
plt.title('Bitcoin Price Future Prediction (Next 30 Days)')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()


In [None]:
!pip install joblib
import joblib

# After training
joblib.dump(model, 'isolation_model.pkl')

