# Importing necessary libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the CSV data
data = pd.read_csv('path/to/genres_v2.csv')

# Step 1: Display the first few rows and data types
first_few_rows = data.head()
data_types = data.dtypes
print("First few rows:")
print(first_few_rows)
print("\nData types:")
print(data_types)

# Step 2: Check for missing values
missing_values = data.isnull().sum()
print("\nMissing values:")
print(missing_values)

# Step 3: Get basic statistics for numerical columns
summary_statistics = data.describe()
print("\nSummary statistics:")
print(summary_statistics)

# Step 4: Explore unique values in 'genre' column
unique_genres = data['genre'].unique()
print("\nUnique genres:")
print(unique_genres)

# Step 5: Remove columns with excessive missing data
data_cleaned = data.drop(columns=['Unnamed: 0', 'title'])

# Step 6: Remove rows with missing values in 'song_name' column
data_cleaned = data_cleaned.dropna(subset=['song_name'])

# Step 7: Create box plots for 'loudness' and 'tempo' to identify outliers
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
sns.boxplot(x=data_cleaned['loudness'])
plt.title('Box Plot of Loudness')

plt.subplot(1, 2, 2)
sns.boxplot(x=data_cleaned['tempo'])
plt.title('Box Plot of Tempo')

plt.tight_layout()
plt.show()

# Step 8: Identify outliers in 'loudness' and 'tempo'
def get_outliers(column):
    Q1 = column.quantile(0.25)
    Q3 = column.quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return data_cleaned[(column < lower_bound) | (column > upper_bound)]

loudness_outliers = get_outliers(data_cleaned['loudness'])
tempo_outliers = get_outliers(data_cleaned['tempo'])

# Step 9: Display a sample of outliers
print("\nSample of Loudness Outliers:")
print(loudness_outliers.head())

print("\nSample of Tempo Outliers:")
print(tempo_outliers.head())
