In [3]:
import pandas as pd

# Read the data from the image
data = {
    'Age': [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35],
    'Income': [55000, 48000, 45000, 42000, 37000, 33000, 29000, 25000, 22000, 19000, 16000],
    'Student': [0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0],
    'Credit rating': ['fair', 'good', 'excellent', 'fair', 'fair', 'fair', 'poor', 'poor', 'poor', 'fair', 'poor'],
    'Buys computer': ['yes', 'no', 'yes', 'no', 'yes', 'no', 'no', 'no', 'no', 'no', 'no']
}

# Convert the dictionary into a Pandas DataFrame
df = pd.DataFrame(data)

# Count the number of instances of each class
class_counts = df['Buys computer'].value_counts()

# Calculate the prior probability for each class
prior_probabilities = class_counts / class_counts.sum()

# Print the prior probabilities
print(prior_probabilities)


no     0.727273
yes    0.272727
Name: Buys computer, dtype: float64


In [6]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KernelDensity

# Read the data from the image
data = {
    'Age': [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35],
    'Income': [55000, 48000, 45000, 42000, 37000, 33000, 29000, 25000, 22000, 19000, 16000],
    'Student': [0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0],
    'Credit rating': ['fair', 'good', 'excellent', 'fair', 'fair', 'fair', 'poor', 'poor', 'poor', 'fair', 'poor'],
    'Buys computer': ['yes', 'no', 'yes', 'no', 'yes', 'no', 'no', 'no', 'no', 'no', 'no']
}

# Convert the dictionary into a Pandas DataFrame
df = pd.DataFrame(data)

# Calculate class conditional densities for Age and Income features
features = ['Age', 'Income']
classes = df['Buys computer'].unique()

for feature in features:
    print(f"Class Conditional Densities for Feature: {feature}\n")
    for class_label in classes:
        # Filter data for the specific class label
        data_class = df[df['Buys computer'] == class_label][feature]
        
        # Reshape the data to fit the KDE estimator
        data_class = data_class.values.reshape(-1, 1)
        
        # Fit KDE estimator
        kde = KernelDensity(kernel='gaussian').fit(data_class)
        
        # Generate sample data for which to evaluate the estimated densities
        x = pd.DataFrame({feature: range(min(df[feature]), max(df[feature]))})
        x_values = x[feature].values.reshape(-1, 1)
        
        # Evaluate the estimated densities for the sample data
        log_density = kde.score_samples(x_values)
        density = pd.Series(np.exp(log_density), index=x.index)
        
        print(f"Class: {class_label}\nDensity:\n{density}\n")


Class Conditional Densities for Feature: Age

Class: yes
Density:
0    1.510224e-01
1    1.627911e-01
2    1.689747e-01
3    1.627911e-01
4    1.510224e-01
5    8.213469e-02
6    1.804160e-02
7    1.477778e-03
8    4.461210e-05
9    4.955762e-07
dtype: float64

Class: no
Density:
0    0.030801
1    0.056634
2    0.061064
3    0.063936
4    0.068366
5    0.094199
6    0.118234
7    0.124429
8    0.124412
9    0.117680
dtype: float64

Class Conditional Densities for Feature: Income

Class: yes
Density:
0        0.000000e+00
1        0.000000e+00
2        0.000000e+00
3        0.000000e+00
4        0.000000e+00
             ...     
38995    4.955732e-07
38996    4.461008e-05
38997    1.477283e-03
38998    1.799699e-02
38999    8.065691e-02
Length: 39000, dtype: float64

Class: no
Density:
0        0.049868
1        0.030246
2        0.006749
3        0.000554
4        0.000017
           ...   
38995    0.000000
38996    0.000000
38997    0.000000
38998    0.000000
38999    0.000000
Leng

In [7]:
import pandas as pd
from scipy.stats import chi2_contingency
from scipy.stats import spearmanr, pearsonr

# Read the data from the image
data = {
    'Age': [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35],
    'Income': [55000, 48000, 45000, 42000, 37000, 33000, 29000, 25000, 22000, 19000, 16000],
    'Student': [0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0],
    'Credit rating': ['fair', 'good', 'excellent', 'fair', 'fair', 'fair', 'poor', 'poor', 'poor', 'fair', 'poor'],
    'Buys computer': ['yes', 'no', 'yes', 'no', 'yes', 'no', 'no', 'no', 'no', 'no', 'no']
}

# Convert the dictionary into a Pandas DataFrame
df = pd.DataFrame(data)

# Calculate chi-square test for independence between 'Student' and 'Credit rating'
cross_tab = pd.crosstab(df['Student'], df['Credit rating'])
chi2, p, _, _ = chi2_contingency(cross_tab)
print(f"Chi-square test result for 'Student' and 'Credit rating':")
print(f"Chi-square value: {chi2}, p-value: {p}\n")

# Calculate correlation for 'Age' and 'Income'
pearson_corr, pearson_p = pearsonr(df['Age'], df['Income'])
spearman_corr, spearman_p = spearmanr(df['Age'], df['Income'])
print(f"Pearson correlation between 'Age' and 'Income': {pearson_corr}, p-value: {pearson_p}")
print(f"Spearman correlation between 'Age' and 'Income': {spearman_corr}, p-value: {spearman_p}")


Chi-square test result for 'Student' and 'Credit rating':
Chi-square value: 2.5732142857142857, p-value: 0.4622047909810518

Pearson correlation between 'Age' and 'Income': -0.9965476878818288, p-value: 4.861402895225907e-11
Spearman correlation between 'Age' and 'Income': -1.0, p-value: 0.0


In [13]:
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report

# Data
data = {
    'Age': [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35],
    'Income': [55000, 48000, 45000, 42000, 37000, 33000, 29000, 25000, 22000, 19000, 16000],
    'Student': [0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0],
    'Credit rating': ['fair', 'good', 'excellent', 'fair', 'fair', 'fair', 'poor', 'poor', 'poor', 'fair', 'poor'],
    'Buys computer': ['yes', 'no', 'yes', 'no', 'yes', 'no', 'no', 'no', 'no', 'no', 'no']
}

# Create DataFrame
df = pd.DataFrame(data)

# Features and target variable
X = df.drop('Buys computer', axis=1)
y = df['Buys computer']

# Define columns and transformers for preprocessing
categorical_cols = ['Credit rating']
categorical_transformer = OneHotEncoder(sparse=False)  # Ensure sparse=False for dense output

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_cols)
    ]
)

# Create pipeline with preprocessing and model
pipeline = make_pipeline(preprocessor, GaussianNB())

# Perform cross-validation and get accuracy scores
cv_scores = cross_val_score(pipeline, X, y, cv=5, scoring='accuracy')

# Fit the model on the entire dataset
pipeline.fit(X, y)

# Get classification report
predictions = pipeline.predict(X)
classification_rep = classification_report(y, predictions)

# Print results
print("Cross-Validation Accuracy Scores:", cv_scores)
print("Mean CV Accuracy:", cv_scores.mean())
print("\nClassification Report:\n", classification_rep)


Cross-Validation Accuracy Scores: [nan nan 1.  1.  0.5]
Mean CV Accuracy: nan

Classification Report:
               precision    recall  f1-score   support

          no       1.00      0.62      0.77         8
         yes       0.50      1.00      0.67         3

    accuracy                           0.73        11
   macro avg       0.75      0.81      0.72        11
weighted avg       0.86      0.73      0.74        11



Traceback (most recent call last):
  File "C:\Users\akshi\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 136, in __call__
    score = scorer._score(
            ^^^^^^^^^^^^^^
  File "C:\Users\akshi\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 353, in _score
    y_pred = method_caller(estimator, "predict", X)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\akshi\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 86, in _cached_call
    result, _ = _get_response_values(
                ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\akshi\anaconda3\Lib\site-packages\sklearn\utils\_response.py", line 85, in _get_response_values
    y_pred = prediction_method(X)
             ^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\akshi\anaconda3\Lib\site-packages\sklearn\pipeline.py", line 507, in predict
    Xt = transform.transform(Xt)
         ^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\akshi\anaconda3\Lib\site-packages\sklearn\utils\_set_output.py", l

In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report

# Load your dataset
data = pd.read_excel('training (2).xlsx')

# Explore the dataset to understand its structure, columns, etc.
# ... (inspect data, check columns, data types, missing values, etc.)

# Preprocess the data, handle missing values, encode categorical variables, etc.
# Assuming 'Classification' is the target variable and you have prepared your features and target as X and y

# Split the data into training and testing sets
X = data.drop('Classification', axis=1)  # Features
y = data['Classification']  # Target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize Gaussian Naive Bayes model
nb_model = GaussianNB()

# Train the model
nb_model.fit(X_train, y_train)

# Make predictions
predictions = nb_model.predict(X_test)

# Evaluate the model
classification_rep = classification_report(y_test, predictions)
print("Classification Report:\n", classification_rep)


ValueError: could not convert string to float: 'x= - b ± √b2- 4ac/2a\t\na=4\t\nb=3\t\nc=4\t\n= - 3 ±√9 - 4(4)(5)/2(4)\t\n= - 3 ±√9 - 80/8\t\n= - 3 ±√- 71/8\t\n'

In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report

# Load your dataset
data = pd.read_excel('training (2).xlsx')

# Clean the dataset by removing rows with non-numeric or irrelevant data in 'Classification' column
data_cleaned = data.dropna(subset=['Classification'])  # Drop rows with NaN values
data_cleaned = data_cleaned[data_cleaned['Classification'].apply(lambda x: str(x).isdigit())]  # Keep only numeric values

# Convert 'Classification' column to numeric (if needed)
data_cleaned['Classification'] = data_cleaned['Classification'].astype(int)

# Define features and target variable
X = data_cleaned.drop('Classification', axis=1)  # Features
y = data_cleaned['Classification']  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize Gaussian Naive Bayes model
nb_model = GaussianNB()

# Train the model
nb_model.fit(X_train, y_train)

# Make predictions
predictions = nb_model.predict(X_test)

# Evaluate the model
classification_rep = classification_report(y_test, predictions)
print("Classification Report:\n", classification_rep)


ValueError: could not convert string to float: 'x= - b ± √b2- 4ac/2a\t\na=4\t\nb=3\t\nc=4\t\n= - 3 ±√9 - 4(4)(5)/2(4)\t\n= - 3 ±√9 - 80/8\t\n= - 3 ±√- 71/8\t\n'