bunu kullandÄ±k devam edebilirsiniz:

https://chatgpt.com/share/67410a77-56bc-8001-8105-c46d14f84456

In [1]:
!pip install pandas



In [44]:
def create_room_based_frames(df, rowCountFrame):
    """
    Creates frames from a preprocessed DataFrame based on 'Room' categories.

    :param df: Input preprocessed DataFrame.
    :param rowCountFrame: Number of rows each frame should contain.
    :return: List of frames (DataFrames).
    """
    frames = []

    # Group data by 'Room'
    grouped = df.groupby('Room')

    # Iterate through each group
    for room, group in grouped:
        # Split group into chunks of rowCountFrame size
        for i in range(0, len(group), rowCountFrame):
            chunk = group.iloc[i:i+rowCountFrame]
            # Ensure the chunk has exactly rowCountFrame rows
            if len(chunk) == rowCountFrame:
                frames.append(chunk)

    print(f"Total frames created: {len(frames)}")
    return frames


def process_dataframe(df):
    """
    Processes the input DataFrame by label encoding specific columns and printing summary information.

    :param df: Input DataFrame with concatenated data.
    :return: Processed DataFrame with label-encoded columns.
    """
    # Print the number of rows in the input DataFrame
    print(f"Number of rows in the input DataFrame: {len(df)}")

    # Initialize a dictionary to store unique value counts
    unique_values = {}

    # Columns to label encode
    df = df.drop(["ESSID","Frequency","Bit Rate","Timestamp"],axis="columns")

    columns_to_encode = ["Room", "Address", "ESSID", "Frequency", "Bit Rate"]
    label_encoders = {}

    # Label encode each specified column and count unique values
    for column in columns_to_encode:
        if column in df.columns:
            le = LabelEncoder()
            df[column] = le.fit_transform(df[column].astype(str))  # Convert to string for encoding
            label_encoders[column] = le
            unique_values[column] = len(le.classes_)  # Count unique classes
        else:
            print(f"Warning: Column '{column}' not found in the DataFrame.")
    if "Signal Level" in df.columns:
        df["Signal Level"] = 1 - (df["Signal Level"] * -0.01)


    # Print the number of unique values in each column
    for column, count in unique_values.items():
        print(f"Column '{column}' has {count} unique values.")

    return df




In [45]:
from sklearn.preprocessing import LabelEncoder
import os
import pandas as pd

# Initialize an empty DataFrame to hold all data
final_dataframe = pd.DataFrame()

# Directory containing the CSV files
directory_path = '.'  # Update with the path to your directory

# Loop through all files in the directory
for file in os.listdir(directory_path):
    if file.endswith('.csv') and file != "concatenated_dataframe.csv":
        file_path = os.path.join(directory_path, file)
        print(f"Processing file: {file_path}")

        # Read the CSV into a DataFrame
        df = pd.read_csv(file_path)

        # Append the processed DataFrame to the final DataFrame
        final_dataframe = pd.concat([final_dataframe, df], ignore_index=True)

# Print summary of the final DataFrame
print(f"Final DataFrame shape: {final_dataframe.shape}")
# save final_dataframe as csv
processed_df = process_dataframe(final_dataframe)
processed_df.to_csv('dataset.csv', index=False)


rowCountFrame = 10  # Set the desired frame size
frames = create_room_based_frames(processed_df, rowCountFrame)


Processing file: ./wifi_scan_L035_1732312703.csv
Processing file: ./wifi_scan_bathroom_1732311595.csv
Processing file: ./wifi_scan_L056_1732304554.csv
Processing file: ./wifi_scan_L030_1732309218.csv
Processing file: ./wifi_scan_L048_1732307002.csv
Processing file: ./wifi_scan_L027_1732312244.csv
Processing file: ./wifi_scan_L047_1732307455.csv
Processing file: ./wifi_scan_L045_1732308641.csv
Processing file: ./wifi_scan_corridoor1_1732306010.csv
Processing file: ./wifi_scan_L029_1732310213.csv
Processing file: ./wifi_scan_L055_1732305415.csv
Processing file: ./wifi_scan_corridoor2_1732310902.csv
Final DataFrame shape: (20608, 7)
Number of rows in the input DataFrame: 20608
Column 'Room' has 12 unique values.
Column 'Address' has 95 unique values.
Total frames created: 2056


In [43]:
import random
from sklearn.model_selection import train_test_split

# Shuffle the frames
random.shuffle(frames)

# Split into training and testing sets
train_frames, test_frames = train_test_split(frames, test_size=0.2, random_state=42)

# Further split training frames into training and validation sets
train_frames, val_frames = train_test_split(train_frames, test_size=0.2, random_state=42)

# Function to extract X and Y from frames
def extract_X_Y(frames):
    """
    Extracts X (features) and Y (labels) from a list of frames.

    :param frames: List of DataFrames (frames).
    :return: X (features as a DataFrame), Y (labels as a Series).
    """
    X_list = []
    Y_list = []

    for frame in frames:
        X = frame.drop(columns=["Room"])  # Drop the 'Room' column for features
        Y = frame["Room"]  # Use 'Room' as labels
        X_list.append(X)
        Y_list.append(Y)

    # Concatenate all frames into a single DataFrame/Series
    X = pd.concat(X_list, ignore_index=True)
    Y = pd.concat(Y_list, ignore_index=True)

    return X, Y

# Extract X and Y for train, validation, and test sets
X_train, Y_train = extract_X_Y(train_frames)
X_val, Y_val = extract_X_Y(val_frames)
X_test, Y_test = extract_X_Y(test_frames)

# Print summary
print(f"Training set: X={X_train.shape}, Y={Y_train.shape}")
print(f"Validation set: X={X_val.shape}, Y={Y_val.shape}")
print(f"Testing set: X={X_test.shape}, Y={Y_test.shape}")


Training set: X=(13150, 2), Y=(13150,)
Validation set: X=(3290, 2), Y=(3290,)
Testing set: X=(4120, 2), Y=(4120,)
