In [5]:
!pip3 install -U ucimlrepo 
!pip3 install pandas
!pip3 install seaborn



In [6]:
from ucimlrepo import fetch_ucirepo, list_available_datasets
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import seaborn as sns

In [7]:
nursery = fetch_ucirepo(id=76) 

In [8]:
X = nursery.data.features 
y = nursery.data.targets 

In [9]:
print(nursery.metadata) 

{'uci_id': 76, 'name': 'Nursery', 'repository_url': 'https://archive.ics.uci.edu/dataset/76/nursery', 'data_url': 'https://archive.ics.uci.edu/static/public/76/data.csv', 'abstract': ' Nursery Database was derived from a hierarchical decision model originally developed to rank applications for nursery schools.', 'area': 'Social Science', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 12960, 'num_features': 8, 'feature_types': ['Categorical'], 'demographics': [], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1989, 'last_updated': 'Sun Jan 14 2024', 'dataset_doi': '10.24432/C5P88W', 'creators': ['Vladislav Rajkovic'], 'intro_paper': {'title': 'An application for admission in public school systems', 'authors': 'M. Olave, V. Rajkovic, M. Bohanec', 'published_in': 'Expert Systems in Public Administration', 'year': 1989, 'url': 'https://www.academia.edu/16670755/An_applica

In [10]:
print(nursery.variables) 

       name     role         type demographic  \
0   parents  Feature  Categorical        None   
1  has_nurs  Feature  Categorical        None   
2      form  Feature  Categorical        None   
3  children  Feature  Categorical        None   
4   housing  Feature  Categorical        None   
5   finance  Feature  Categorical        None   
6    social  Feature  Categorical        None   
7    health  Feature  Categorical        None   
8     class   Target  Categorical        None   

                                         description units missing_values  
0                     usual, pretentious, great_pret  None             no  
1  proper, less_proper, improper, critical, very_...  None             no  
2            complete, completed, incomplete, foster  None             no  
3                                      1, 2, 3, more  None             no  
4                    convenient, less_conv, critical  None             no  
5                                 convenient, inconv 

In [11]:
nursery_df = pd.DataFrame(data=X, columns=nursery.data.feature_names)
nursery_df['target'] = y

In [12]:
nursery_df.head()

Unnamed: 0,parents,has_nurs,form,children,housing,finance,social,health,target
0,usual,proper,complete,1,convenient,convenient,nonprob,recommended,recommend
1,usual,proper,complete,1,convenient,convenient,nonprob,priority,priority
2,usual,proper,complete,1,convenient,convenient,nonprob,not_recom,not_recom
3,usual,proper,complete,1,convenient,convenient,slightly_prob,recommended,recommend
4,usual,proper,complete,1,convenient,convenient,slightly_prob,priority,priority


In [13]:
nursery_df['target'].value_counts()

target
not_recom     4320
priority      4266
spec_prior    4044
very_recom     328
recommend        2
Name: count, dtype: int64

In [14]:
nursery_df['parents'].value_counts()

parents
usual          4320
pretentious    4320
great_pret     4320
Name: count, dtype: int64

In [15]:
nursery_df['has_nurs'].value_counts()

has_nurs
proper         2592
less_proper    2592
improper       2592
critical       2592
very_crit      2592
Name: count, dtype: int64

In [16]:
nursery_df['form'].value_counts()

form
complete      3240
completed     3240
incomplete    3240
foster        3240
Name: count, dtype: int64

In [17]:
nursery_df['children'].value_counts()

children
1       3240
2       3240
3       3240
more    3240
Name: count, dtype: int64

In [18]:
nursery_df['housing'].value_counts()

housing
convenient    4320
less_conv     4320
critical      4320
Name: count, dtype: int64

In [19]:
nursery_df['finance'].value_counts()

finance
convenient    6480
inconv        6480
Name: count, dtype: int64

In [20]:
nursery_df['social'].value_counts()

social
nonprob          4320
slightly_prob    4320
problematic      4320
Name: count, dtype: int64

In [21]:
nursery_df['health'].value_counts()

health
recommended    4320
priority       4320
not_recom      4320
Name: count, dtype: int64

In [22]:
target_mapping = {'recommend': 'recommended', 'priority': 'recommended'}
nursery_df['target'] = nursery_df['target'].replace(target_mapping)


In [24]:
# Check for missing values
print("\nMissing values:")
print(nursery_df.isnull().sum())

# Check for categorical variables
print("\nCategorical variables:")
print(nursery_df.dtypes)

# Encode categorical variables
filtered_df = pd.get_dummies(nursery_df, columns=["parents", "has_nurs", "form", "children", "housing", "finance", "social", "health"])

# Display the first few rows of the cleaned dataset
print("\nCleaned dataset:")
print(filtered_df.head())

# Save the cleaned dataset to a new CSV file
filtered_df.to_csv("cleaned_nursery_data.csv", index=False)


Missing values:
parents     0
has_nurs    0
form        0
children    0
housing     0
finance     0
social      0
health      0
target      0
dtype: int64

Categorical variables:
parents     object
has_nurs    object
form        object
children    object
housing     object
finance     object
social      object
health      object
target      object
dtype: object

Cleaned dataset:
        target  parents_great_pret  parents_pretentious  parents_usual  \
0  recommended               False                False           True   
1  recommended               False                False           True   
2    not_recom               False                False           True   
3  recommended               False                False           True   
4  recommended               False                False           True   

   has_nurs_critical  has_nurs_improper  has_nurs_less_proper  \
0              False              False                 False   
1              False              False   

In [25]:
filtered_df = pd.get_dummies(nursery_df, drop_first=True, dtype='int')
filtered_df

Unnamed: 0,parents_pretentious,parents_usual,has_nurs_improper,has_nurs_less_proper,has_nurs_proper,has_nurs_very_crit,form_completed,form_foster,form_incomplete,children_2,...,housing_critical,housing_less_conv,finance_inconv,social_problematic,social_slightly_prob,health_priority,health_recommended,target_recommended,target_spec_prior,target_very_recom
0,0,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,1,1,0,0
1,0,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,0
2,0,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,1,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,1,1,0,0
4,0,1,0,0,1,0,0,0,0,0,...,0,0,0,0,1,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12955,0,0,0,0,0,1,0,1,0,0,...,1,0,1,0,1,1,0,0,1,0
12956,0,0,0,0,0,1,0,1,0,0,...,1,0,1,0,1,0,0,0,0,0
12957,0,0,0,0,0,1,0,1,0,0,...,1,0,1,1,0,0,1,0,1,0
12958,0,0,0,0,0,1,0,1,0,0,...,1,0,1,1,0,1,0,0,1,0


In [26]:
# Data preprocessing
le = LabelEncoder()
filtered_df = filtered_df.apply(le.fit_transform)
filtered_df

Unnamed: 0,parents_pretentious,parents_usual,has_nurs_improper,has_nurs_less_proper,has_nurs_proper,has_nurs_very_crit,form_completed,form_foster,form_incomplete,children_2,...,housing_critical,housing_less_conv,finance_inconv,social_problematic,social_slightly_prob,health_priority,health_recommended,target_recommended,target_spec_prior,target_very_recom
0,0,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,1,1,0,0
1,0,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,0
2,0,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,1,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,1,1,0,0
4,0,1,0,0,1,0,0,0,0,0,...,0,0,0,0,1,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12955,0,0,0,0,0,1,0,1,0,0,...,1,0,1,0,1,1,0,0,1,0
12956,0,0,0,0,0,1,0,1,0,0,...,1,0,1,0,1,0,0,0,0,0
12957,0,0,0,0,0,1,0,1,0,0,...,1,0,1,1,0,0,1,0,1,0
12958,0,0,0,0,0,1,0,1,0,0,...,1,0,1,1,0,1,0,0,1,0


In [27]:
# Split data into features and target

X = filtered_df.drop('target_very_recom', axis=1)
y = filtered_df['target_spec_prior']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the SVC model with RBF kernel
model_rbf = SVC(kernel='rbf', random_state=42)
model_rbf.fit(X_train, y_train)

y_pred_rbf = model_rbf.predict(X_test)
accuracy_rbf = accuracy_score(y_test, y_pred_rbf)
print("RBF Kernel Accuracy:", accuracy_rbf)

print("RBF Kernel Classification Report:")
print(classification_report(y_test, y_pred_rbf))


RBF Kernel Accuracy: 1.0
RBF Kernel Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1807
           1       1.00      1.00      1.00       785

    accuracy                           1.00      2592
   macro avg       1.00      1.00      1.00      2592
weighted avg       1.00      1.00      1.00      2592



In [28]:
# Initialize and train the SVC model with Polynomial kernel
model_poly = SVC(kernel='poly', degree=3, random_state=42)  # Degree 3 polynomial kernel
model_poly.fit(X_train, y_train)

y_pred_poly = model_poly.predict(X_test)
accuracy_poly = accuracy_score(y_test, y_pred_poly)
print("Polynomial Kernel Accuracy:", accuracy_poly)

print("Polynomial Kernel Classification Report:")
print(classification_report(y_test, y_pred_poly))

Polynomial Kernel Accuracy: 1.0
Polynomial Kernel Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1807
           1       1.00      1.00      1.00       785

    accuracy                           1.00      2592
   macro avg       1.00      1.00      1.00      2592
weighted avg       1.00      1.00      1.00      2592



In [34]:
confusion_matrix = confusion_matrix(y_test, y_train)
plt.figure(figsize=(8, 6))
sns.set(font_scale=1.4)
sns.heatmap(confusion_matrix, annot=True, fmt='d', cmap='Blues', annot_kws={"size": 16})
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

NameError: name 'confusion_matrix' is not defined