In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
df = pd.read_csv('music.csv')

# Display the first few rows
print(df.head())

# Check for missing values
print(df.isnull().sum())

# Split the data into features and labels
X = df.drop(columns=['genre'])  # Features: all columns except 'genre'
y = df['genre']                 # Labels: the 'genre' column

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the model
model = DecisionTreeClassifier()

# Train the model
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Example of new data
new_data = pd.DataFrame({
    'age': [20, 25],
    'gender': [1, 0]
})

# Predict the genre
predictions = model.predict(new_data)
print(predictions)


   age  gender   genre
0   20       1  HipHop
1   23       1  HipHop
2   25       1  HipHop
3   26       1    Jazz
4   29       1    Jazz
age       0
gender    0
genre     0
dtype: int64
Accuracy: 100.00%
['HipHop' 'Dance']
