In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

In [4]:
# Replace with the root folder path containing the CSV files
root_folder_path = 'C:\\Users\\Jiahe Liu\\CNN\\Data'

# Create an empty DataFrame
result_df = pd.DataFrame(columns=['IR intensity'])

In [5]:
# Check all folders under the root folder
for folder_name in os.listdir(root_folder_path):
    folder_path = os.path.join(root_folder_path, folder_name)

    # Check if the path is a folder
    if os.path.isdir(folder_path):
        # Create a temporary DataFrame
        temp_df = pd.DataFrame(index=range(len(os.listdir(folder_path))), columns=['IR intensity'])

        # Iterate through all CSV files in the folder
        for i, csv_file in enumerate(os.listdir(folder_path)):
            if csv_file.endswith('.csv'):
                print(f"Processing CSV file: {csv_file}")
                # Read the CSV file, starting from the first row
                df = pd.read_csv(os.path.join(folder_path, csv_file), skiprows=0, header=None)
                # Extract data from the second column starting from the first row
                array2 = df.iloc[:, 1].values.tolist()
                # Store the array in the corresponding cell of temp_df's column
                temp_df.at[i, 'IR intensity'] = array2
        # Add the data from temp_df to result_df
        temp_df['Label'] = folder_name
        result_df = result_df._append(temp_df, ignore_index=True)

Processing CSV file: 0.csv
Processing CSV file: 1.csv
Processing CSV file: 10.csv
Processing CSV file: 100.csv
Processing CSV file: 101.csv
Processing CSV file: 102.csv
Processing CSV file: 103.csv
Processing CSV file: 104.csv
Processing CSV file: 105.csv
Processing CSV file: 106.csv
Processing CSV file: 107.csv
Processing CSV file: 108.csv
Processing CSV file: 109.csv
Processing CSV file: 11.csv
Processing CSV file: 110.csv
Processing CSV file: 111.csv
Processing CSV file: 112.csv
Processing CSV file: 113.csv
Processing CSV file: 114.csv
Processing CSV file: 115.csv
Processing CSV file: 116.csv
Processing CSV file: 117.csv
Processing CSV file: 118.csv
Processing CSV file: 119.csv
Processing CSV file: 12.csv
Processing CSV file: 120.csv
Processing CSV file: 121.csv
Processing CSV file: 122.csv
Processing CSV file: 123.csv
Processing CSV file: 124.csv
Processing CSV file: 125.csv
Processing CSV file: 126.csv
Processing CSV file: 127.csv
Processing CSV file: 128.csv
Processing CSV file: 

In [6]:
# extract x and y
y = result_df['Label']
x = result_df.drop(['Label'], axis=1)

In [7]:
x_list = x.values.flatten().tolist()
y_list = result_df['Label'].astype(float)

In [8]:
class SpectraCNN(nn.Module):
    def __init__(self, num_classes=5):
        super(SpectraCNN, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=5)
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=5)
        self.pool = nn.MaxPool1d(kernel_size=2)
        self.fc1 = nn.Linear(64*997, 128)  # 998 is the output size after convolution and pooling 64*998
        self.fc2 = nn.Linear(128, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.pool(x)
        x = torch.relu(self.conv2(x))
        x = self.pool(x)
        x = x.view(-1, 64*997)
        # Reshape before fully connected layer
        x = torch.relu(self.fc1(x))
        
        x = self.fc2(x)
        #x = self.softmax(x)
        #x = torch.tensor(x,requires_grad = True)
        #out = torch.argmax(x,dim=1) + np.ones(32)
        return x

model = SpectraCNN()

In [9]:
x_train = torch.tensor(x_list, dtype=torch.float32)

In [10]:
y_train = torch.tensor(y_list, dtype=torch.long)
y_train = y_train.float()

  y_train = torch.tensor(y_list, dtype=torch.long)


In [11]:
tensor_data = x_train.unsqueeze(1)

In [12]:
y_labels = y_train 

In [13]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#Move model on GPU
model = model.to(device)

In [14]:
# Start training
# spectra_data shape: (5000, 1, 4000)
# labels shape: (5000,)
spectra_dataset = TensorDataset(tensor_data, y_labels)
batch_size = 128  # Adjust batch size according to your preference

# Create DataLoader for batch training
dataloader = DataLoader(spectra_dataset, batch_size=batch_size, shuffle=True)

# Initialize your model
#model = SpectraCNN()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 300  # Adjust number of epochs based on your preference
for epoch in range(num_epochs):
    running_loss = 0
    true_num = 0
    for i, batch in enumerate(dataloader):
        inputs, labels = batch
        optimizer.zero_grad()

        # Forward pass
        input_cuda = inputs.to(device)
        outputs = model(input_cuda)

        # Calculate loss
        #print(type(outputs))
        #print(labels)
        labels_cuda = (torch.tensor(labels, dtype=torch.long)-1).to(device)
        loss = criterion(outputs, labels_cuda)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        true_num += sum(torch.argmax(outputs,dim=1).cpu() == (labels-1)).tolist()
        # Print statistics every 100 batches
        """
        if (i + 1) % 100 == 0:
            print(f"Epoch [{epoch + 1}/{num_epochs}], Batch [{i + 1}/{len(dataloader)}], Loss: {running_loss / 100:.4f}")
            running_loss = 0.0
        """
        #print(f"Epoch [{epoch + 1}/{num_epochs}], Batch [{i + 1}/{len(dataloader)}], Loss: {running_loss / 100:.4f}")
    print(epoch,'  ',running_loss, "   ", true_num/5000)

  labels_cuda = (torch.tensor(labels, dtype=torch.long)-1).to(device)


0    4527.518334388733     0.2748
1    64.41870558261871     0.2
2    64.40575790405273     0.2
3    64.40945661067963     0.2
4    64.37967646121979     0.2
5    64.39167618751526     0.2
6    64.39253401756287     0.2
7    64.3819899559021     0.2
8    64.38466274738312     0.2
9    64.37979483604431     0.2
10    64.3822820186615     0.2
11    64.3802717924118     0.1924
12    64.37782979011536     0.1994
13    64.38015604019165     0.2
14    64.37858486175537     0.2
15    64.38186931610107     0.2006
16    64.37853968143463     0.2
17    64.37908399105072     0.1976
18    64.38123822212219     0.1968
19    64.38519990444183     0.2
20    64.37859690189362     0.1938
21    64.38012945652008     0.1908
22    64.38266587257385     0.1972
23    64.38089203834534     0.195
24    64.38175654411316     0.191
25    64.37926733493805     0.1908
26    64.38360702991486     0.2
27    64.38149452209473     0.1956
28    64.38163912296295     0.1896
29    64.37982439994812     0.1942
30    64.3