In [6]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

def process_category(category_files, category_name):
    all_data = []
    for file in category_files:
        data = pd.read_excel(file, skiprows=6)
        data['Datetime'] = pd.to_datetime(data['Date'])
        all_data.append(data)

    combined_data = pd.concat(all_data, ignore_index=True)
    combined_data = combined_data.sort_values('Datetime')

    INITIAL_WEIGHT = 176  # grams (as specified)
    MAX_CAPACITY = INITIAL_WEIGHT * 0.3  # assuming 30% of initial weight as max capacity

    combined_data['estimated_absorption'] = combined_data['Humidity (% RH)'] / 100 * MAX_CAPACITY
    combined_data['absorption_percentage'] = (combined_data['estimated_absorption'] / MAX_CAPACITY) * 100
    combined_data['time_elapsed'] = (combined_data['Datetime'] - combined_data['Datetime'].min()).dt.total_seconds() / 3600

    X = combined_data[['time_elapsed']]
    y = combined_data['absorption_percentage']

    model = LinearRegression()
    model.fit(X, y)

    absorption_rate = model.coef_[0]
    r_squared = model.score(X, y)
    y_pred = model.predict(X)
    mse = mean_squared_error(y, y_pred)

    print(f"\nResults for category {category_name}:")
    print(f"Initial Weight: 176g")
    print(f"Absorption rate: {absorption_rate:.6f}% per hour")
    print(f"R-squared: {r_squared:.4f}")
    print(f"Mean Squared Error: {mse:.4f}")

    if absorption_rate > 0:
        current_absorption = y.iloc[-1]
        remaining_absorption = 100 - current_absorption
        estimated_days = (remaining_absorption / absorption_rate) / 24
        print(f"Estimated time to reach full capacity: {estimated_days:.2f} days")
    else:
        print("Absorption rate is not positive. Cannot estimate time to full capacity.")

    print(f"\nInitial absorption: {y.iloc[0]:.2f}%")
    print(f"Final absorption: {y.iloc[-1]:.2f}%")
    print(f"Data collection period: {(combined_data['Datetime'].max() - combined_data['Datetime'].min()).total_seconds() / 86400:.2f} days")
    print(f"Number of data points: {len(combined_data)}")

    print("\nTemperature Statistics:")
    print(combined_data['Temperature (°C)'].describe())

    print("\nHumidity Statistics:")
    print(combined_data['Humidity (% RH)'].describe())

# Specify the folder path containing your Excel files
folder_path = '/content/drive/MyDrive/Silica_GEL_Project/Raw data/Boxes with silica gel'

# Get all Excel files in the folder
excel_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.xlsx')]

# Categorize files
categories = {
    'Lock and Lock': [],
    'Clip and close': [],
    'Stewart': [],
    'Sistema': []
}

for file in excel_files:
    file_lower = file.lower()
    if 'lock and lock' in file_lower:
        categories['Lock and Lock'].append(file)
    elif 'clip and close' in file_lower:
        categories['Clip and close'].append(file)
    elif 'stewart' in file_lower:
        categories['Stewart'].append(file)
    elif 'sistema' in file_lower:
        categories['Sistema'].append(file)

# Process each category
for category, files in categories.items():
    if files:
        process_category(files, category)
    else:
        print(f"No files found for category {category}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Results for category Lock and Lock:
Initial Weight: 176g
Absorption rate: 0.002792% per hour
R-squared: 0.8921
Mean Squared Error: 3.1769
Estimated time to reach full capacity: 1235.46 days

Initial absorption: 41.70%
Final absorption: 17.20%
Data collection period: 264.89 days
Number of data points: 686586

Temperature Statistics:
count    686586.000000
mean         20.000553
std           0.003354
min          20.000000
25%          20.000000
50%          20.000000
75%          20.000000
max          20.030000
Name: Temperature (°C), dtype: float64

Humidity Statistics:
count    686586.000000
mean          9.963477
std           5.425849
min           0.200000
25%           5.200000
50%           9.800000
75%          14.500000
max          43.700000
Name: Humidity (% RH), dtype: float64

Results for category Clip and close:
Initial Weight: 176g
Absorption

In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from google.colab import drive
import os
import pickle

# Mount Google Drive
drive.mount('/content/drive')

def process_category(category_files, category_name):
    all_data = []
    for file in category_files:
        data = pd.read_excel(file, skiprows=6)
        data['Datetime'] = pd.to_datetime(data['Date'])
        all_data.append(data)

    combined_data = pd.concat(all_data, ignore_index=True)
    combined_data = combined_data.sort_values('Datetime')

    INITIAL_WEIGHT = 176  # grams (as specified)
    MAX_CAPACITY = INITIAL_WEIGHT * 0.3  # assuming 30% of initial weight as max capacity

    combined_data['estimated_absorption'] = combined_data['Humidity (% RH)'] / 100 * MAX_CAPACITY
    combined_data['absorption_percentage'] = (combined_data['estimated_absorption'] / MAX_CAPACITY) * 100
    combined_data['time_elapsed'] = (combined_data['Datetime'] - combined_data['Datetime'].min()).dt.total_seconds() / 3600

    X = combined_data[['time_elapsed']]
    y = combined_data['absorption_percentage']

    model = LinearRegression()
    model.fit(X, y)

    absorption_rate = model.coef_[0]  # This is already a scalar
    intercept = model.intercept_
    r_squared = model.score(X, y)
    y_pred = model.predict(X)
    mse = mean_squared_error(y, y_pred)

    print(f"\nResults for category {category_name}:")
    print(f"Initial Weight: 176g")
    print(f"Absorption rate: {absorption_rate:.6f}% per hour")
    print(f"Intercept: {intercept:.6f}")
    print(f"R-squared: {r_squared:.4f}")
    print(f"Mean Squared Error: {mse:.4f}")

    if absorption_rate > 0:
        current_absorption = y.iloc[-1]
        remaining_absorption = 100 - current_absorption
        estimated_days = (remaining_absorption / absorption_rate) / 24
        print(f"Estimated time to reach full capacity: {estimated_days:.2f} days")
    else:
        estimated_days = float('inf')
        print("Absorption rate is not positive. Cannot estimate time to full capacity.")

    mean_humidity = combined_data['Humidity (% RH)'].mean()
    mean_temperature = combined_data['Temperature (°C)'].mean()

    return {
        'absorption_rate': absorption_rate,
        'intercept': intercept,
        'mean_humidity': mean_humidity,
        'mean_temperature': mean_temperature,
        'estimated_days': estimated_days,
        'initial_absorption': y.iloc[0],
        'final_absorption': y.iloc[-1],
        'data_collection_period': (combined_data['Datetime'].max() - combined_data['Datetime'].min()).total_seconds() / 86400
    }

# Specify the folder path containing your Excel files
folder_path = '/content/drive/MyDrive/Silica_GEL_Project/Raw data/Boxes with silica gel'

# Get all Excel files in the folder
excel_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.xlsx')]

# Categorize files
categories = {
    'Lock and Lock': [],
    'Clip and close': [],
    'Stewart': [],
    'Sistema': []
}

for file in excel_files:
    file_lower = file.lower()
    if 'lock and lock' in file_lower:
        categories['Lock and Lock'].append(file)
    elif 'clip and close' in file_lower:
        categories['Clip and close'].append(file)
    elif 'stewart' in file_lower:
        categories['Stewart'].append(file)
    elif 'sistema' in file_lower:
        categories['Sistema'].append(file)

# Process each category and store results
results = {}
for category, files in categories.items():
    if files:
        results[category] = process_category(files, category)
    else:
        print(f"No files found for category {category}")

# Save results to a pickle file
with open('silica_gel_boxes_results.pkl', 'wb') as file:
    pickle.dump(results, file)

print("\nAll results have been saved to 'silica_gel_boxes_results.pkl'.")

# Print saved data for verification
print("\nSaved Data:")
for category, data in results.items():
    print(f"{category}:")
    for key, value in data.items():
        if isinstance(value, float):
            print(f"  {key}: {value:.6f}")
        else:
            print(f"  {key}: {value}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Results for category Lock and Lock:
Initial Weight: 176g
Absorption rate: 0.002792% per hour
Intercept: 1.087287
R-squared: 0.8921
Mean Squared Error: 3.1769
Estimated time to reach full capacity: 1235.46 days

Results for category Clip and close:
Initial Weight: 176g
Absorption rate: 0.001723% per hour
Intercept: 0.054442
R-squared: 0.6075
Mean Squared Error: 1.7465
Estimated time to reach full capacity: 2241.41 days

Results for category Stewart:
Initial Weight: 176g
Absorption rate: 0.001048% per hour
Intercept: -0.450910
R-squared: 0.3683
Mean Squared Error: 1.7150
Estimated time to reach full capacity: 3951.10 days

Results for category Sistema:
Initial Weight: 176g
Absorption rate: 0.006393% per hour
Intercept: 13.886609
R-squared: 0.8967
Mean Squared Error: 4.2849
Estimated time to reach full capacity: 421.02 days

All results have been saved to 'sili

In [3]:
import pickle

# Load the results from the pickle file
with open('silica_gel_boxes_results.pkl', 'rb') as file:
    results = pickle.load(file)

def predict_duration(weight, numholes, brand, numbags, sizeholes, temperature):
    if brand not in results:
        return f"No data available for {brand} brand"

    data = results[brand]

    # Use the pre-calculated estimated_days
    days_to_full_capacity = data['estimated_days']
    absorption_rate = data['absorption_rate']
    intercept = data['intercept']
    mean_humidity = data['mean_humidity']

    return days_to_full_capacity, mean_humidity, absorption_rate, intercept

# Predefined inputs
inputs = [
    {"weight": 176, "numholes": 0, "brand": "Lock and Lock", "numbags": 2, "sizeholes": 0, "temperature": 20},
    {"weight": 176, "numholes": 15, "brand": "Clip and close", "numbags": 2, "sizeholes": 0, "temperature": 20},
    {"weight": 176, "numholes": 30, "brand": "Stewart", "numbags": 2, "sizeholes": 0, "temperature": 20},
    {"weight": 176, "numholes": 45, "brand": "Sistema", "numbags": 2, "sizeholes": 0, "temperature": 20}
]

# Make predictions for each input
print("\nPredictions:")
for input_data in inputs:
    result = predict_duration(**input_data)
    if isinstance(result, str):
        print(result)
    else:
        estimated_days, mean_humidity, absorption_rate, intercept = result
        print(f"\nPrediction for {input_data['brand']}:")
        print(f"Input parameters: {input_data}")
        print(f"Absorption rate: {absorption_rate:.6f}% per hour")
        print(f"Intercept: {intercept:.6f}")
        print(f"Predicted days to full capacity: {estimated_days:.2f}")
        print(f"Mean humidity used: {mean_humidity:.2f}%")

# Print loaded data for verification
print("\nLoaded Data from Pickle File:")
for category, data in results.items():
    print(f"{category}:")
    for key, value in data.items():
        if isinstance(value, float):
            print(f"  {key}: {value:.6f}")
        else:
            print(f"  {key}: {value}")


Predictions:

Prediction for Lock and Lock:
Input parameters: {'weight': 176, 'numholes': 0, 'brand': 'Lock and Lock', 'numbags': 2, 'sizeholes': 0, 'temperature': 20}
Absorption rate: 0.002792% per hour
Intercept: 1.087287
Predicted days to full capacity: 1235.46
Mean humidity used: 9.96%

Prediction for Clip and close:
Input parameters: {'weight': 176, 'numholes': 15, 'brand': 'Clip and close', 'numbags': 2, 'sizeholes': 0, 'temperature': 20}
Absorption rate: 0.001723% per hour
Intercept: 0.054442
Predicted days to full capacity: 2241.41
Mean humidity used: 2.90%

Prediction for Stewart:
Input parameters: {'weight': 176, 'numholes': 30, 'brand': 'Stewart', 'numbags': 2, 'sizeholes': 0, 'temperature': 20}
Absorption rate: 0.001048% per hour
Intercept: -0.450910
Predicted days to full capacity: 3951.10
Mean humidity used: 1.28%

Prediction for Sistema:
Input parameters: {'weight': 176, 'numholes': 45, 'brand': 'Sistema', 'numbags': 2, 'sizeholes': 0, 'temperature': 20}
Absorption rate