In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**DATA DESCRIPTION**

In [None]:
import pandas as pd

# Load the dataset
df_rollator = pd.read_csv("/content/drive/MyDrive/new gait data/final_rollator_exampledata.csv")

# 1. Display basic information
print("Dataset Shape:", df_rollator.shape)
print("\nFirst 5 rows:")
print(df_rollator.head())
print("\nColumns:")
print(df_rollator.columns.tolist())
print("\nData Types:")
print(df_rollator.dtypes)

# 2. Check for missing values
print("\nMissing Values Summary:")
print(df_rollator.isnull().sum())

# 3. Generate descriptive statistics
print("\nDescriptive Statistics:")
print(df_rollator.describe(include='all'))

# 4. Check unique values for categorical columns
print("\nUnique Values Count:")
for column in df_rollator.columns:
    print(f"{column}: {df_rollator[column].nunique()} unique values")

# 5. Temporal data analysis (for time-like columns)
time_columns = [col for col in df_rollator.columns if ':' in str(df_rollator[col].iloc[0])]
if time_columns:
    print("\nTime-related Columns Summary:")
    print(df_rollator[time_columns].describe())

# 6. Correlation analysis (for numerical columns)
numerical_cols = df_rollator.select_dtypes(include=['float64', 'int64']).columns
if not numerical_cols.empty:
    print("\nCorrelation Matrix:")
    print(df_rollator[numerical_cols].corr())

Dataset Shape: (60, 48)

First 5 rows:
  Regular step WithSlopeup  Regular step WithStrideSlopeup  \
0                  15:03.4                            61.0   
1                  15:04.6                            59.0   
2                  15:05.9                            63.0   
3                  15:07.0                            60.0   
4                  15:08.2                            52.0   

   Regular step WithSpeedSlopeup Regular step WithSlopedown  \
0                          137.0                    31:51.5   
1                          122.0                    31:52.8   
2                          122.0                    31:54.0   
3                          118.0                    31:55.1   
4                          116.0                    31:56.2   

   Regular step WithStrideSlopedown  Regular step WithSpeedSlopdown  \
0                              50.0                           118.0   
1                              47.0                           127.0

In [None]:
import pandas as pd
import numpy as np

# Load the dataset
df_rollator = pd.read_csv("/content/drive/MyDrive/new gait data/final_rollator_exampledata.csv")

# Custom formatting function
def print_header(text):
    print(f"\n{'='*60}")
    print(f"|| {text.upper()}")
    print(f"{'='*60}\n")

# 1. Dataset Overview
print_header("dataset overview")
print(f"• Total Records: {df_rollator.shape[0]}")
print(f"• Total Features: {df_rollator.shape[1]}")
print("\nFirst 3 rows:")
print(df_rollator.head(3).to_string())
print("\nData Types Distribution:")
print(df_rollator.dtypes.value_counts().to_string())

# 2. Missing Value Analysis
print_header("missing value analysis")
missing_df = df_rollator.isnull().sum().to_frame(name='Missing Count')
missing_df['Missing %'] = (missing_df['Missing Count'] / len(df_rollator)) * 100
print(missing_df.sort_values(by='Missing Count', ascending=False).to_string())

# 3. Cardinality Check
print_header("cardinality analysis")
unique_counts = df_rollator.nunique().to_frame(name='Unique Values')
unique_counts['Data Type'] = df_rollator.dtypes
print(unique_counts.sort_values(by='Unique Values', ascending=False).to_string())



# 5. Statistical Analysis
print_header("statistical summary")
numerical_cols = df_rollator.select_dtypes(include=np.number).columns
print("Numerical Features:")
print(df_rollator[numerical_cols].describe().transpose().to_string())

categorical_cols = df_rollator.select_dtypes(exclude=np.number).columns
print("\nCategorical Features:")
print(df_rollator[categorical_cols].describe().transpose().to_string())

# 6. Correlation Insights
print_header("correlation insights")
corr_matrix = df_rollator[numerical_cols].corr().abs()
strong_corrs = corr_matrix.unstack().sort_values(ascending=False).drop_duplicates()
strong_corrs = strong_corrs[strong_corrs > 0.7].dropna()
print("Strong Correlations (|r| > 0.7):")
print(strong_corrs.to_string())

# 7. Data Quality Flags
print_header("data quality flags")
print("• High Missing Values (>30%):")
print(missing_df[missing_df['Missing %'] > 30].to_string())
print("\n• High Cardinality Features (>50 unique values):")
print(unique_counts[unique_counts['Unique Values'] > 50].to_string())
print("\n• Potential Constant Features (<2 unique values):")
print(unique_counts[unique_counts['Unique Values'] < 2].to_string())

# 8. Suggested Actions
print_header("suggested next steps")
print("1. Time Column Standardization Needed")
print("2. Address High Missing Values in Acceleration WithAverageZSlopedown (66.67%)")
print("3. Investigate Strong Correlations Between:")
print("   - Regular step WithStrideSlopedown and Regular step Walking speedslopdown (r = 0.98)")
print("   - Regular step WithoutStrideSlopedown and Regular step withoutspeedslopedown (r = 0.98)")
print("4. Consider Feature Engineering for Temporal Measurements")
print("5. Evaluate High Cardinality Features for Dimensionality Reduction")


|| DATASET OVERVIEW

• Total Records: 60
• Total Features: 48

First 3 rows:
  Regular step WithSlopeup  Regular step WithStrideSlopeup  Regular step WithSpeedSlopeup Regular step WithSlopedown  Regular step WithStrideSlopedown  Regular step WithSpeedSlopdown Regular step WithGravel  Regular step WithStrideGravel  Regular step WithSpeedGravel Regular step WithSidewalk  Regular step WithStrideSidwalk  Regular step WithSpeedSidewalk  Regular step Walking speedslopdown  Regular step Walking speed slopeup  Regular step Walking speed gravel  Regular step Walking speed sidewalk Acceleration WithSlopeup.1  Acceleration WithAverageZSlopeup Acceleration WithSlopedown.1  Acceleration WithAverageZSlopedown Acceleration WithGravel.1  Acceleration WithAverageZGravel Acceleration WithSidewalk.1  Acceleration WithAverageZSidewalk Regular step WithoutSlopeup  Regular step WithoutStrideSlopeup  Regular step WithoutSpeedSlopeup Regular step WithoutSlopedown  Regular step WithoutStrideSlopedown  Regular