In [None]:
import os
import pandas as pd

# Specify the directory containing the CSV files
folder_path = r'XXXXXXXXXXX'

# List all files in the folder
files = os.listdir(folder_path)

# Filter the list to include only CSV files
csv_files = [file for file in files if file.endswith('.csv')]

# Dictionary to store DataFrames
dataframes = {}

# Loop through each CSV file and store them in the dictionary with names df1, df2, etc.
for i, csv_file in enumerate(csv_files, start=1):
    # Construct the full file path
    file_path = os.path.join(folder_path, csv_file)
    
    # Read the CSV file
    data = pd.read_csv(file_path)
    
    # Create a DataFrame name like df1, df2, etc.
    df_name = f'df{i}'
    
    # Store the DataFrame in the dictionary
    dataframes[df_name] = data
    
    # Print the DataFrame name and file name
    print(f'DataFrame Name: {df_name}')
    print(f'File Name: {csv_file}')
    print()  # Add a blank line for better readability

# Optionally, you can access and print the first few rows of each DataFrame to verify
for df_name, df in dataframes.items():
    print(f'DataFrame: {df_name}')
    print(df.head())
    print()  # Add a blank line for better readability


--------------------------------------------------





# Extract headers from df7
df7_headers = dataframes['df7'].columns.tolist()

# Assign these headers to df3
dataframes['df3'].columns = df7_headers

# Identify the original file name for df3
df3_file_name = csv_files[2]  # Since df3 corresponds to the third file

# Construct the full file path for df3
df3_file_path = os.path.join(folder_path, df3_file_name)

# Rewrite df3 to the original CSV file
dataframes['df3'].to_csv(df3_file_path, index=False)

# Verify the rewrite
print(f'Rewritten DataFrame: df3 to file {df3_file_name}')
print(dataframes['df3'].head())


--------------------------------------------------

# Assuming 'dataframes' dictionary holds your DataFrames named as 'df1', 'df2', etc.
# Combine all DataFrames into one DataFrame with an additional column 'Attack/Normal'

# Create a list to hold data with the new Attack/Normal column
all_data = []

# Append each DataFrame to the list with the new column
for df_name, df in dataframes.items():
    # Create a copy to avoid modifying the original DataFrames
    df_copy = df.copy()
    
    # Assign "Normal" to rows of df7 and "Attack" to all others
    if df_name == 'df7':
        df_copy['Attack/Normal'] = 'Normal'
    else:
        df_copy['Attack/Normal'] = 'Attack'
    
    # Append to the list
    all_data.append(df_copy)

# Concatenate all DataFrames into a single DataFrame
combined_df = pd.concat(all_data, ignore_index=True)

# Print some information to verify
print(combined_df.head())
print(combined_df['Attack/Normal'].value_counts())




--------------------------------------------------

# Map 'Attack' to 0 and 'Normal' to 1
combined_df['Attack/Normal Binary'] = combined_df['Attack/Normal'].map({'Attack': 0, 'Normal': 1})

# Drop the original 'Attack/Normal' column
combined_df = combined_df.drop(columns=['Attack/Normal'])

# Print the updated DataFrame to verify the changes
print(combined_df.head())


--------------------------------------------------

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Assuming 'combined_df' contains your combined DataFrame with the 'Attack/Normal Binary' column as the target variable

# Separate features and target
X = combined_df.drop(['Attack/Normal Binary'], axis=1)  # Assuming all other columns are features
y = combined_df['Attack/Normal Binary']

# Split the data into training and testing sets using stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Scale the features (important for many algorithms, including SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the decision tree model
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train_scaled, y_train)

# Make predictions with decision tree model
dt_y_pred = dt_model.predict(X_test_scaled)

# Evaluate decision tree model
dt_accuracy = accuracy_score(y_test, dt_y_pred)

# Initialize and train the SVM model
svm_model = SVC()
svm_model.fit(X_train_scaled, y_train)

# Make predictions with SVM model
svm_y_pred = svm_model.predict(X_test_scaled)

# Evaluate SVM model
svm_accuracy = accuracy_score(y_test, svm_y_pred)

# Initialize and train the logistic regression model
lr_model = LogisticRegression(max_iter=100)
lr_model.fit(X_train_scaled, y_train)

# Make predictions with logistic regression model
lr_y_pred = lr_model.predict(X_test_scaled)

# Evaluate logistic regression model
lr_accuracy = accuracy_score(y_test, lr_y_pred)

# Compare model performances
print("Decision Tree Accuracy:", dt_accuracy)
print("SVM Accuracy:", svm_accuracy)
print("Logistic Regression Accuracy:", lr_accuracy)


--------------------------------------------------

import os
import pandas as pd

# Specify the directory containing the CSV files
folder_path = r'XXXXXXXXXXX'

# List all files in the folder
files = os.listdir(folder_path)

# Filter the list to include only CSV files
csv_files = [file for file in files if file.endswith('.csv')]

# Dictionary to store DataFrames
dataframes = {}

# Loop through each CSV file and store them in the dictionary with names df1, df2, etc.
for i, csv_file in enumerate(csv_files, start=1):
    # Construct the full file path
    file_path = os.path.join(folder_path, csv_file)
    
    # Read the CSV file
    data = pd.read_csv(file_path)
    
    # Create a DataFrame name like df1, df2, etc.
    df_name = f'df{i}'
    
    # Store the DataFrame in the dictionary
    dataframes[df_name] = data
    
    # Print the DataFrame name and file name
    print(f'DataFrame Name: {df_name}')
    print(f'File Name: {csv_file}')
    print()  # Add a blank line for better readability

# Extract headers from df7
df7_headers = dataframes['df7'].columns.tolist()

# Assign these headers to df3
dataframes['df3'].columns = df7_headers

# Identify the original file name for df3
df3_file_name = csv_files[2]  # Since df3 corresponds to the third file

# Construct the full file path for df3
df3_file_path = os.path.join(folder_path, df3_file_name)

# Rewrite df3 to the original CSV file
dataframes['df3'].to_csv(df3_file_path, index=False)

# Verify the rewrite
print(f'Rewritten DataFrame: df3 to file {df3_file_name}')
print(dataframes['df3'].head())

# Function to extract attack type from file name
def extract_attack_type(file_name):
    # Extract the part after "Back_" and before ".csv"
    start = file_name.find("Back_") + len("Back_")
    end = file_name.find(".csv")
    attack_type = file_name[start:end]
    
    # If nothing is found, return 'Others'
    if attack_type == "" or attack_type == "_of_Attack_Back":
        return "Others"
    
    return attack_type

# Create a dictionary for file names
file_names = {f'df{i+1}': csv_files[i] for i in range(len(csv_files))}

# Function to combine DataFrames
def combine_dataframes(dataframes, file_names):
    # Create a list to hold data with the new Attack/Normal column
    all_data = []

    # Append each DataFrame to the list with the new column
    for df_name, df in dataframes.items():
        # Create a copy to avoid modifying the original DataFrames
        df_copy = df.copy()
        
        # Get the corresponding file name
        file_name = file_names[df_name]
        
        # Extract the attack type from the file name
        attack_type = extract_attack_type(file_name)
        
        # Assign the attack type to the new column
        df_copy['Attack/Normal'] = attack_type
        
        # Append to the list
        all_data.append(df_copy)

    # Concatenate all DataFrames into a single DataFrame
    combined_df = pd.concat(all_data, ignore_index=True)

    # Print some information to verify
    print(combined_df.head())
    print(combined_df['Attack/Normal'].value_counts())

    return combined_df

# Combine all DataFrames into one
combined_df = combine_dataframes(dataframes, file_names)


--------------------------------------------------

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Assuming 'combined_df' is your DataFrame with features and target variable
# Replace 'Attack/Normal' with your actual target column name
X = combined_df.drop('Attack/Normal', axis=1)
y = combined_df['Attack/Normal']

# Encode the target variable if it's categorical
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, random_state=42,stratify=y)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the logistic regression model
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Now, we will implement Decision Tree and SVM models for comparison

from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

# Initialize and train the Decision Tree model
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train_scaled, y_train)

# Make predictions with Decision Tree
y_pred_dt = dt_model.predict(X_test_scaled)

# Evaluate the Decision Tree model
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt))
print("\nDecision Tree Classification Report:\n", classification_report(y_test, y_pred_dt, target_names=label_encoder.classes_))

# Initialize and train the SVM model
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train_scaled, y_train)

# Make predictions with SVM
y_pred_svm = svm_model.predict(X_test_scaled)

# Evaluate the SVM model
print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))
print("\nSVM Classification Report:\n", classification_report(y_test, y_pred_svm, target_names=label_encoder.classes_))


--------------------------------------------------



--------------------------------------------------

