In [None]:
from google.colab import files
uploaded = files.upload()

Saving data.csv to data.csv


In [None]:
import io
import pandas as pd
data = pd.read_csv(io.BytesIO(uploaded['data.csv']))
# Dataset is now stored in a Pandas Dataframe

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models

# Load data from the CSV file - This was done upstairs because of how the data is loaded
#data = pd.read_csv('your_data_file.csv')

# Separate the labels (fraudulent or normal) from the input features
labels = data['FLAG']
input_features = data.drop(columns=['CONS_NO', 'FLAG'])

# Replace non-numeric or null values with NaN in input features
input_features = input_features.apply(pd.to_numeric, errors='coerce')

# Interpolate missing values using the specified method
def interpolate_missing_values(series):
    for i in range(1, len(series) - 1):
        if np.isnan(series[i]):
            if not np.isnan(series[i-1]) and not np.isnan(series[i+1]):
                series[i] = (series[i-1] + series[i+1]) / 2
            elif np.isnan(series[i-1]) or np.isnan(series[i+1]):
                series[i] = 0  # or any other appropriate value
    return series

input_features = input_features.apply(interpolate_missing_values, axis=0)

In [None]:
# Calculate the average and standard deviation of each column
average_values = input_features.mean()
std_values = input_features.std()

# Apply outlier correction using the three-sigma rule to input features
def correct_outliers(series, avg, std):
    for i in range(len(series)):
        if series[i] > avg + 2 * std:
            series[i] = avg + 2 * std
    return series

for column in input_features.columns:
    input_features[column] = correct_outliers(input_features[column], average_values[column], std_values[column])

# Combine input features and labels back into a single DataFrame
preprocessed_data = pd.concat([input_features, labels], axis=1)

In [None]:
# Stratified splitting of the data into training, testing, and validation sets
X_train, X_temp, y_train, y_temp = train_test_split(input_features, labels, test_size=0.3, stratify=labels, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

In [1]:
# Define the CNN model
def create_cnn_model():
    model = models.Sequential()

    # 1st Convolutional layer with 32 filters
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(height, width, channels)))
    model.add(layers.MaxPooling2D((2, 2)))

    # 2nd Convolutional layer with 64 filters
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))

    # 3rd Convolutional layer with 128 filters
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))

    # 4th Convolutional layer with 256 filters
    model.add(layers.Conv2D(256, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))

    # 5th Convolutional layer with 512 filters
    model.add(layers.Conv2D(512, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))

    # Flatten the output for fully connected layers
    model.add(layers.Flatten())

    # Fully connected layer
    model.add(layers.Dense(256, activation='relu'))

    # Output layer with softmax activation for two-class classification (theft or normal)
    model.add(layers.Dense(2, activation='softmax'))

    return model

In [None]:
# Set the input dimensions (adjust according to your data)
#print(X_train.shape)

#I had a runtime problem. So, I'm using the actual data you'll get for X_train.shape which is (29660, 1034). Modify it when you actually want to run the code
#Actual code is below. Correct it to use it
#height, width, channels = X_train.shape[0], X_train.shape[1], 1  # Assuming 1 channel (energy consumption)

from tensorflow.keras import layers, models
height, width, channels = 29660, 1034, 1  # Assuming 1 channel (energy consumption)

# Create the CNN model
cnn_model = create_cnn_model()

# Compile the model
cnn_model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

# Print the model summary
cnn_model.summary()

In [None]:
# Train the model using the training data
history = cnn_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

# Evaluate the model on the test data
test_loss, test_accuracy = cnn_model.evaluate(X_test, y_test, verbose=0)
print("Test Accuracy:", test_accuracy)

In [None]:
# Plot the accuracy and loss over epochs
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
