# CSV Datatype Analysis

In [None]:
import pandas as pd
import numpy as np

In [None]:
# Load the CSV file
df = pd.read_csv('Mobile.csv')
print(f"Dataset shape: {df.shape}")
print(f"Number of rows: {df.shape[0]}")
print(f"Number of columns: {df.shape[1]}")

In [None]:
# Display first few rows
df.head()

In [None]:
# Display datatypes of all columns
print("Column Datatypes:")
print(df.dtypes)

In [None]:
# Detailed information including datatypes, non-null counts, and memory usage
df.info()

In [None]:
df.isnull().sum()

In [None]:
# Summary of datatypes
print("\nDatatype Summary:")
print(df.dtypes.value_counts())

In [None]:
# Convert string columns to integer by removing unit suffixes
# Integer conversions
df['Battery_power_mAh'] = df['Battery_power_mAh'].str.replace(' mAh', '').astype(int)
df['Ram_mb'] = df['Ram_mb'].str.replace(' mb', '').astype(int)
df['Internal_memeory_gb'] = df['Internal_memeory_gb'].str.replace(' gb', '').astype(int)
df['Primary_camera'] = df['Primary_camera'].str.replace(' pixels', '').astype(int)
df['Front_camera'] = df['Front_camera'].str.replace(' pixels', '').astype(int)
df['Mobile_weight'] = df['Mobile_weight'].str.replace(' g', '').astype(int)
df['px_height'] = df['px_height'].str.replace(' ppcm', '').astype(int)
df['Pixel_width'] = df['Pixel_width'].str.replace(' ppcm', '').astype(int)
df['Screen_height'] = df['Screen_height'].str.replace(' cm', '').astype(int)

# Float conversions
df['Mobile_depth'] = df['Mobile_depth'].str.replace(' cm', '').astype(float)

print("Data type conversions completed!")
print("\nUpdated datatypes:")
print(df.dtypes)

In [None]:
# Convert binary categorical features from Yes/No to 1/0
binary_columns = ['Bluetooh', 'Dual_sim', '4G', '3G', 'touch_screen', 'wifi']

for col in binary_columns:
    if col in df.columns:  # Check if column exists
        df[col] = df[col].map({'Yes': 1, 'No': 0})

print("Binary categorical conversions completed!")
print("\nConverted columns:")
for col in binary_columns:
    if col in df.columns:
        print(f"{col}: {df[col].unique()}")

In [None]:
# Remove screen_height, screen_weight, and 3G columns
columns_to_remove = ['Screen_height', 'Screen_weight', '3G']
df = df.drop(columns=columns_to_remove)

print(f"Columns removed: {columns_to_remove}")
print(f"New dataset shape: {df.shape}")
print(f"Remaining columns: {df.shape[1]}")