In [38]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.neural_network import BernoulliRBM
from sklearn.model_selection import train_test_split

# Load the dataset
data = pd.read_csv('./datasets/None_data5.csv')

try:
    # Drop the 'Unnamed: 0' column
    data = data.drop(columns=['Unnamed: 0'])
except:
    pass

# Preprocess the categorical columns
categorical_columns = data.select_dtypes(include=['object']).columns
label_encoders = {}
for column in categorical_columns:
    label_encoder = LabelEncoder()
    data[column] = label_encoder.fit_transform(data[column])
    label_encoders[column] = label_encoder

# Convert the dataset to numpy array
data_array = data.values

# Split the dataset into train and test sets
train_data, test_data = train_test_split(data_array, test_size=0.2, random_state=42)

# Train the RBM
rbm = BernoulliRBM(n_components=10, n_iter=10, learning_rate=0.01, random_state=42)
rbm.fit(train_data)

# Extract hidden patterns from the RBM
hidden_patterns = rbm.transform(test_data)

def extract_rules(hidden_patterns, threshold=0.5, min_support=0.1, min_confidence=0.5):
    confident_rules = []
    num_features = hidden_patterns.shape[1]

    for feature_idx in range(num_features):
        column_idx = feature_idx // 10
        label_idx = feature_idx % 10
        column_name = data.columns[column_idx]
        
        if column_name not in label_encoders:
            continue
        
        label_encoder = label_encoders[column_name]
        thresholds = label_encoder.inverse_transform([label_idx])[0]

        feature_values = hidden_patterns[:, feature_idx]
        support = np.mean(feature_values > threshold)
        confidence = np.mean(feature_values[feature_values > threshold])

        if support >= min_support and confidence >= min_confidence:
            confident_rules.append((column_name, thresholds, support, confidence))

    return confident_rules

# Extract rules from hidden patterns
pattern_rules = extract_rules(hidden_patterns, threshold=0.5, min_support=0.1, min_confidence=0.5)

# Print the extracted rules with threshold, support, and confidence
for column_name, thresholds, support, confidence in pattern_rules:
    threshold_sentence = ', '.join([f"{column_name} {threshold}" for threshold in thresholds])
    threshold_percentage = support * 100

    sentence = f"{threshold_percentage:.2f}% of instances have {threshold_sentence} in the data."
    print(sentence)
    print(f"Confidence: {confidence:.2f}")
    print(f"Support: {support:.2f}")
    print()


100.00% of instances have FIN_REFERENCE H, FIN_REFERENCE L, FIN_REFERENCE S, FIN_REFERENCE A, FIN_REFERENCE 0, FIN_REFERENCE 0, FIN_REFERENCE 0, FIN_REFERENCE 0, FIN_REFERENCE 0, FIN_REFERENCE 2, FIN_REFERENCE 0, FIN_REFERENCE D, FIN_REFERENCE T, FIN_REFERENCE U in the data.
Confidence: 1.00
Support: 1.00

100.00% of instances have FIN_REFERENCE H, FIN_REFERENCE L, FIN_REFERENCE S, FIN_REFERENCE A, FIN_REFERENCE 0, FIN_REFERENCE 0, FIN_REFERENCE 0, FIN_REFERENCE 0, FIN_REFERENCE 0, FIN_REFERENCE 3, FIN_REFERENCE 7, FIN_REFERENCE 4 in the data.
Confidence: 1.00
Support: 1.00

100.00% of instances have FIN_REFERENCE H, FIN_REFERENCE L, FIN_REFERENCE S, FIN_REFERENCE A, FIN_REFERENCE 0, FIN_REFERENCE 0, FIN_REFERENCE 0, FIN_REFERENCE 0, FIN_REFERENCE 0, FIN_REFERENCE 3, FIN_REFERENCE B, FIN_REFERENCE C in the data.
Confidence: 1.00
Support: 1.00

100.00% of instances have FIN_REFERENCE H, FIN_REFERENCE L, FIN_REFERENCE S, FIN_REFERENCE A, FIN_REFERENCE 0, FIN_REFERENCE 0, FIN_REFERENCE 0,

In [40]:
import pandas as pd

# Import the dataset
data = pd.read_csv('datasets/avocado.csv')

# Print the first few rows of the dataset
print(data.head())

# Get some basic statistics about the dataset
print(data.describe())

# Perform data visualization or further analysis using pandas, NumPy, or scikit-learn
# ...

# Example: Calculate and print the average price of avocados
# average_price = data['AveragePrice'].mean()
# print(f"Average price of avocados: ${average_price:.2f}")

   Unnamed: 0        Date  AveragePrice  Total Volume     4046       4225   
0           0  2015-12-27          1.33      64236.62  1036.74   54454.85  \
1           1  2015-12-20          1.35      54876.98   674.28   44638.81   
2           2  2015-12-13          0.93     118220.22   794.70  109149.67   
3           3  2015-12-06          1.08      78992.15  1132.00   71976.41   
4           4  2015-11-29          1.28      51039.60   941.48   43838.39   

     4770  Total Bags  Small Bags  Large Bags  XLarge Bags          type   
0   48.16     8696.87     8603.62       93.25          0.0  conventional  \
1   58.33     9505.56     9408.07       97.49          0.0  conventional   
2  130.50     8145.35     8042.21      103.14          0.0  conventional   
3   72.58     5811.16     5677.40      133.76          0.0  conventional   
4   75.78     6183.95     5986.26      197.69          0.0  conventional   

   year  region  
0  2015  Albany  
1  2015  Albany  
2  2015  Albany  
3  2015 

In [42]:
import pandas as pd
from sklearn.neural_network import BernoulliRBM
from sklearn.preprocessing import MinMaxScaler

# Load the CSV file
data = pd.read_csv("datasets/avocado.csv")  # Replace "data.csv" with your file path

# Preprocess the data
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)

# Train the RBM
rbm = BernoulliRBM(n_components=64, n_iter=10, learning_rate=0.1, verbose=True)
rbm.fit(data_scaled)

# Generate representations
representations = rbm.transform(data_scaled)

# Print the representations
print(representations)

ValueError: could not convert string to float: '2015-12-27'