<a href="https://colab.research.google.com/github/Vaibhav807/Artificial-Intelligence-Projects/blob/main/Image_Creation_File.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Final**

In [1]:
!unzip enose_dataset.zip

Archive:  enose_dataset.zip
   creating: dataset/
  inflating: dataset/submission_example.csv  
  inflating: dataset/.DS_Store       
  inflating: __MACOSX/dataset/._.DS_Store  
   creating: dataset/test/
   creating: dataset/train/
  inflating: dataset/train_test_split_order.csv  
  inflating: __MACOSX/dataset/._train_test_split_order.csv  
  inflating: dataset/test/NTL E-Nose - Patient 34.txt  
  inflating: __MACOSX/dataset/test/._NTL E-Nose - Patient 34.txt  
  inflating: dataset/test/NTL E-Nose - Patient 23.txt  
  inflating: __MACOSX/dataset/test/._NTL E-Nose - Patient 23.txt  
  inflating: dataset/test/NTL E-Nose - Patient 33.txt  
  inflating: __MACOSX/dataset/test/._NTL E-Nose - Patient 33.txt  
  inflating: dataset/test/NTL E-Nose - Patient 32.txt  
  inflating: __MACOSX/dataset/test/._NTL E-Nose - Patient 32.txt  
  inflating: dataset/test/NTL E-Nose - Patient 24.txt  
  inflating: __MACOSX/dataset/test/._NTL E-Nose - Patient 24.txt  
  inflating: dataset/test/NTL E-Nose - Pa

In [2]:
import os
import pandas as pd

def convert_time_to_seconds(time_str):
    minutes, seconds = map(float, time_str.split(':'))
    return minutes * 60 + seconds

def process_file(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    id_line = lines[0].strip()
    result_line = lines[1].strip()

    id_value = int(id_line.split(':')[1].strip())
    result_value = result_line.split(':')[1].strip()

    # Remove the ID, Result lines and the whitespace line, and the header line
    cleaned_lines = lines[4:]

    # Convert the time values and store the numerical data
    data = []
    cumulative_time = 0
    previous_time_value = convert_time_to_seconds(cleaned_lines[0].strip().split('\t')[0])  # Initialize with the first time value

    for i, line in enumerate(cleaned_lines):
        parts = line.strip().split('\t')
        current_time_value = convert_time_to_seconds(parts[0])

        if i == 0:
            time_in_seconds = 0
        else:
            if current_time_value < previous_time_value:
                # Handle clock reset by finding the absolute difference and subtracting 3600
                time_in_seconds = abs(current_time_value - previous_time_value) - 3600
            else:
                time_in_seconds = current_time_value - previous_time_value

        cumulative_time += time_in_seconds
        previous_time_value = current_time_value
        numerical_values = list(map(float, parts[1:]))
        data.append([cumulative_time] + numerical_values)

    # Create DataFrame for standardization
    df = pd.DataFrame(data)
    df.iloc[:, 1:] = df.iloc[:, 1:].apply(lambda x: (x - x.mean()) / x.std())
    numerical_data = df.values.tolist()

    return id_value, result_value, numerical_data


In [3]:
def process_directory(directory_path):
    positive_ids = []
    positive_results = []
    positive_data = []

    negative_ids = []
    negative_results = []
    negative_data = []

    for file_name in os.listdir(directory_path):
        file_path = os.path.join(directory_path, file_name)
        id_value, result_value, numerical_data = process_file(file_path)
        if result_value.upper() == "POSITIVE":
            positive_ids.append(id_value)
            positive_results.append(result_value)
            positive_data.append(numerical_data)
        else:
            negative_ids.append(id_value)
            negative_results.append(result_value)
            negative_data.append(numerical_data)

    return positive_ids, positive_results, positive_data, negative_ids, negative_results, negative_data

In [4]:
def get_data_by_id(file_id, positive_ids, positive_data, negative_ids, negative_data):
    if file_id in positive_ids:
        index = positive_ids.index(file_id)
        return positive_data[index]
    elif file_id in negative_ids:
        index = negative_ids.index(file_id)
        return negative_data[index]
    else:
        return None

In [5]:
import matplotlib.pyplot as plt

def plot_data(data_for_id, file_id, cases):
    print(f"WORKING ON {file_id} GRAPH")
    time_values = [row[0] for row in data_for_id]
    data_matrix = [row[1:] for row in data_for_id]
    data_matrix = list(zip(*data_matrix))  # Transpose the matrix to get columns

    # Identify the first point where cumulative time reaches the specified thresholds
    thresholds = [300, 360, 480, 540, 660, 720, 840]
    threshold_colors = {300: 'black', 360: 'blue', 480: 'red', 540: 'blue', 660: 'red', 720: 'blue', 840: 'blue'}
    threshold_lines = {}

    for time in time_values:
        for threshold in thresholds:
            if threshold not in threshold_lines and time >= threshold:
                threshold_lines[threshold] = time

    fig, axes = plt.subplots(8, 8, figsize=(20, 20))

    # Add a big title at the top
    fig.suptitle(f'ID {file_id} - {cases}', fontsize=20)

    for i, ax in enumerate(axes.flat):
        ax.plot(time_values, data_matrix[i])
        for threshold, time in threshold_lines.items():
            ax.axvline(x=time, color=threshold_colors[threshold], linestyle='--', linewidth=0.8)
        ax.set_title(f'D{i+1}', fontsize=10)
        ax.set_xlabel('Time (s)', fontsize=8)
        ax.set_ylabel('Value', fontsize=8)

    plt.tight_layout(rect=[0, 0.03, 1, 0.95])  # Adjust the rect to leave space for the suptitle

    # Save the plot to a file
    plt.savefig(f'{file_id}_Graph.png')
    plt.close(fig)


In [6]:
import pandas as pd
import numpy as np
# Define the path to the directory
directory_path = '/content/dataset/train'

positive_ids, positive_results, positive_data, negative_ids, negative_results, negative_data = process_directory(directory_path)

# Print out the IDs for positive cases as a list
print("Positive Case IDs:")
print(positive_ids)

# Print out the IDs for negative cases as a list
print("\nNegative Case IDs:")
print(negative_ids)

Positive Case IDs:
[35, 29, 30, 22, 45, 31, 63, 38, 21, 1, 44, 28, 40, 60, 61, 12, 36, 13, 47]

Negative Case IDs:
[5, 6, 20, 53, 16, 17, 7, 41, 55, 10, 46, 18, 27, 26, 4, 62, 42, 39, 50, 58, 19, 8, 57, 56, 37, 9]


In [7]:
for file_id_to_search in negative_ids:
    data_for_id = get_data_by_id(file_id_to_search, positive_ids, positive_data, negative_ids, negative_data)

    if data_for_id:
        plot_data(data_for_id, file_id_to_search, "NEGATIVE")

WORKING ON 5 GRAPH
WORKING ON 6 GRAPH
WORKING ON 20 GRAPH
WORKING ON 53 GRAPH
WORKING ON 16 GRAPH
WORKING ON 17 GRAPH
WORKING ON 7 GRAPH
WORKING ON 41 GRAPH
WORKING ON 55 GRAPH
WORKING ON 10 GRAPH
WORKING ON 46 GRAPH
WORKING ON 18 GRAPH
WORKING ON 27 GRAPH
WORKING ON 26 GRAPH
WORKING ON 4 GRAPH
WORKING ON 62 GRAPH
WORKING ON 42 GRAPH
WORKING ON 39 GRAPH
WORKING ON 50 GRAPH
WORKING ON 58 GRAPH
WORKING ON 19 GRAPH
WORKING ON 8 GRAPH
WORKING ON 57 GRAPH
WORKING ON 56 GRAPH
WORKING ON 37 GRAPH
WORKING ON 9 GRAPH
