In [3]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from google.colab import drive
import os
import pickle

# Mount Google Drive
drive.mount('/content/drive')

def process_category(category_files, category_name):
    all_data = []
    for file in category_files:
        data = pd.read_excel(file, skiprows=6)
        data['Datetime'] = pd.to_datetime(data['Date'])
        all_data.append(data)

    combined_data = pd.concat(all_data, ignore_index=True)
    combined_data = combined_data.sort_values('Datetime')

    INITIAL_WEIGHT = 88  # grams
    MAX_CAPACITY = INITIAL_WEIGHT * 0.3  # assuming 30% of initial weight as max capacity

    combined_data['estimated_absorption'] = combined_data['Humidity (% RH)'] / 100 * MAX_CAPACITY
    combined_data['absorption_percentage'] = (combined_data['estimated_absorption'] / MAX_CAPACITY) * 100
    combined_data['time_elapsed'] = (combined_data['Datetime'] - combined_data['Datetime'].min()).dt.total_seconds() / 3600

    X = combined_data[['time_elapsed']]
    y = combined_data['absorption_percentage']

    model = LinearRegression()
    model.fit(X, y)

    absorption_rate = model.coef_[0]
    intercept = model.intercept_
    r_squared = model.score(X, y)
    y_pred = model.predict(X)
    mse = mean_squared_error(y, y_pred)

    print(f"\nResults for category {category_name}:")
    print(f"Absorption rate: {absorption_rate:.6f}% per hour")
    print(f"Intercept: {intercept:.6f}")
    print(f"R-squared: {r_squared:.4f}")
    print(f"Mean Squared Error: {mse:.4f}")

    if absorption_rate > 0:
        current_absorption = y.iloc[-1]
        remaining_absorption = 100 - current_absorption
        estimated_days = (remaining_absorption / absorption_rate) / 24
        print(f"Estimated time to reach full capacity: {estimated_days:.2f} days")
    else:
        estimated_days = float('inf')
        print("Absorption rate is not positive. Cannot estimate time to full capacity.")

    mean_humidity = combined_data['Humidity (% RH)'].mean()

    return {
        'absorption_rate': absorption_rate,
        'intercept': intercept,
        'r_squared': r_squared,
        'mse': mse,
        'estimated_days': estimated_days,
        'mean_humidity': mean_humidity,
        'initial_absorption': y.iloc[0],
        'final_absorption': y.iloc[-1],
        'data_collection_period': (combined_data['Datetime'].max() - combined_data['Datetime'].min()).total_seconds() / 86400
    }

# Specify the folder path containing your Excel files
folder_path = '/content/drive/MyDrive/Silica_GEL_Project/Raw data/Size of holes'

# Get all Excel files in the folder
excel_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.xlsx')]

# Categorize files
categories = {'loose': [], '1mm': [], '2mm': [], '3mm': []}
for file in excel_files:
    if 'loose' in file.lower():
        categories['loose'].append(file)
    elif '1mm' in file.lower():
        categories['1mm'].append(file)
    elif '2mm' in file.lower():
        categories['2mm'].append(file)
    elif '3mm' in file.lower():
        categories['3mm'].append(file)

# Process each category and store results
results = {}
for category, files in categories.items():
    if files:
        results[category] = process_category(files, category)
    else:
        print(f"No files found for category {category}")

# Save results to a pickle file
with open('silica_gel_holes_results.pkl', 'wb') as file:
    pickle.dump(results, file)

print("\nAll results have been saved to 'silica_gel_holes_results.pkl'.")

# Print saved data for verification
print("\nSaved Data:")
for category, data in results.items():
    print(f"\n{category}:")
    for key, value in data.items():
        if isinstance(value, float):
            print(f"  {key}: {value:.6f}")
        else:
            print(f"  {key}: {value}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Results for category loose:
Absorption rate: 0.002352% per hour
Intercept: -0.374176
R-squared: 0.5554
Mean Squared Error: 4.8982
Estimated time to reach full capacity: 1470.43 days

Results for category 1mm:
Absorption rate: 0.001556% per hour
Intercept: 4.224063
R-squared: 0.4383
Mean Squared Error: 3.4367
Estimated time to reach full capacity: 2426.18 days

Results for category 2mm:
Absorption rate: 0.001255% per hour
Intercept: 4.721910
R-squared: 0.3146
Mean Squared Error: 3.7954
Estimated time to reach full capacity: 2909.52 days

Results for category 3mm:
Absorption rate: 0.001555% per hour
Intercept: 2.561805
R-squared: 0.5938
Mean Squared Error: 1.8323
Estimated time to reach full capacity: 2384.34 days

All results have been saved to 'silica_gel_holes_results.pkl'.

Saved Data:

loose:
  absorption_rate: 0.002352
  intercept: -0.374176
  r_squared:

In [5]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from google.colab import drive
import os
import pickle

# Mount Google Drive
drive.mount('/content/drive')

def process_category(category_files, category_name):
    all_data = []
    for file in category_files:
        data = pd.read_excel(file, skiprows=6)
        data['Datetime'] = pd.to_datetime(data['Date'])
        all_data.append(data)

    combined_data = pd.concat(all_data, ignore_index=True)
    combined_data = combined_data.sort_values('Datetime')

    INITIAL_WEIGHT = 88  # grams
    MAX_CAPACITY = INITIAL_WEIGHT * 0.3  # assuming 30% of initial weight as max capacity

    combined_data['estimated_absorption'] = combined_data['Humidity (% RH)'] / 100 * MAX_CAPACITY
    combined_data['absorption_percentage'] = (combined_data['estimated_absorption'] / MAX_CAPACITY) * 100
    combined_data['time_elapsed'] = (combined_data['Datetime'] - combined_data['Datetime'].min()).dt.total_seconds() / 3600

    X = combined_data[['time_elapsed']]
    y = combined_data['absorption_percentage']

    model = LinearRegression()
    model.fit(X, y)

    absorption_rate = model.coef_[0]
    intercept = model.intercept_
    r_squared = model.score(X, y)
    y_pred = model.predict(X)
    mse = mean_squared_error(y, y_pred)

    if absorption_rate > 0:
        current_absorption = y.iloc[-1]
        remaining_absorption = 100 - current_absorption
        estimated_days = (remaining_absorption / absorption_rate) / 24
    else:
        estimated_days = float('inf')

    mean_humidity = combined_data['Humidity (% RH)'].mean()

    print(f"\nResults for category {category_name}:")
    print(f"Absorption rate: {absorption_rate:.6f}% per hour")
    print(f"Intercept: {intercept:.6f}")
    print(f"R-squared: {r_squared:.6f}")
    print(f"Mean Squared Error: {mse:.6f}")
    print(f"Estimated time to reach full capacity: {estimated_days:.6f} days")
    print(f"Mean humidity: {mean_humidity:.6f}%")
    print(f"Initial absorption: {y.iloc[0]:.6f}%")
    print(f"Final absorption: {y.iloc[-1]:.6f}%")
    print(f"Data collection period: {(combined_data['Datetime'].max() - combined_data['Datetime'].min()).total_seconds() / 86400:.6f} days")

    return {
        'absorption_rate': absorption_rate,
        'intercept': intercept,
        'r_squared': r_squared,
        'mse': mse,
        'estimated_days': estimated_days,
        'mean_humidity': mean_humidity,
        'initial_absorption': y.iloc[0],
        'final_absorption': y.iloc[-1],
        'data_collection_period': (combined_data['Datetime'].max() - combined_data['Datetime'].min()).total_seconds() / 86400
    }

# Specify the folder path containing your Excel files
folder_path = '/content/drive/MyDrive/Silica_GEL_Project/Raw data/Size of holes'

# Get all Excel files in the folder
excel_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.xlsx')]

# Categorize files
categories = {'loose': [], '1mm': [], '2mm': [], '3mm': []}
for file in excel_files:
    if 'loose' in file.lower():
        categories['loose'].append(file)
    elif '1mm' in file.lower():
        categories['1mm'].append(file)
    elif '2mm' in file.lower():
        categories['2mm'].append(file)
    elif '3mm' in file.lower():
        categories['3mm'].append(file)

# Process each category and store results
results = {}
for category, files in categories.items():
    if files:
        results[category] = process_category(files, category)
    else:
        print(f"No files found for category {category}")

# Save results to a pickle file
with open('silica_gel_holes_results.pkl', 'wb') as file:
    pickle.dump(results, file)

print("\nAll results have been saved to 'silica_gel_holes_results.pkl'.")

# Print saved data for verification
print("\nSaved Data:")
for category, data in results.items():
    print(f"\n{category}:")
    for key, value in data.items():
        if isinstance(value, float):
            print(f"  {key}: {value:.6f}")
        else:
            print(f"  {key}: {value}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Results for category loose:
Absorption rate: 0.002352% per hour
Intercept: -0.374176
R-squared: 0.555354
Mean Squared Error: 4.898160
Estimated time to reach full capacity: 1470.434691 days
Mean humidity: 3.909267%
Initial absorption: 47.300000%
Final absorption: 17.000000%
Data collection period: 151.824757 days

Results for category 1mm:
Absorption rate: 0.001556% per hour
Intercept: 4.224063
R-squared: 0.438350
Mean Squared Error: 3.436724
Estimated time to reach full capacity: 2426.179079 days
Mean humidity: 7.060623%
Initial absorption: 59.000000%
Final absorption: 9.400000%
Data collection period: 151.925289 days

Results for category 2mm:
Absorption rate: 0.001255% per hour
Intercept: 4.721910
R-squared: 0.314636
Mean Squared Error: 3.795381
Estimated time to reach full capacity: 2909.524862 days
Mean humidity: 7.007965%
Initial absorption: 50.800000%

In [6]:
import pickle

# Load the results from the pickle file
with open('silica_gel_holes_results.pkl', 'rb') as file:
    results = pickle.load(file)

def predict_duration(weight, numholes, brand, numbags, sizeholes, temperature):
    if sizeholes == 'loose':
        category = 'loose'
    else:
        category = f"{sizeholes}mm"

    if category not in results:
        return f"No data available for {category} hole size"

    data = results[category]
    absorption_rate = data['absorption_rate']
    intercept = data['intercept']
    mean_humidity = data['mean_humidity']
    estimated_days = data['estimated_days']

    return estimated_days, mean_humidity, absorption_rate, intercept

# Predefined inputs
inputs = [
    {"weight": 88, "numholes": 30, "brand": 1, "numbags": 2, "sizeholes": 'loose', "temperature": 20},
    {"weight": 88, "numholes": 30, "brand": 1, "numbags": 2, "sizeholes": 1, "temperature": 20},
    {"weight": 88, "numholes": 30, "brand": 1, "numbags": 2, "sizeholes": 2, "temperature": 20},
    {"weight": 88, "numholes": 30, "brand": 1, "numbags": 2, "sizeholes": 3, "temperature": 20}
]

# Make predictions for each input
print("\nPredictions:")
for input_data in inputs:
    result = predict_duration(**input_data)
    if isinstance(result, str):
        print(result)
    else:
        days_to_full_capacity, mean_humidity, absorption_rate, initial_absorption = result
        print(f"\nPrediction for {input_data['sizeholes']} hole size:")
        print(f"Input parameters: {input_data}")
        print(f"Absorption rate: {absorption_rate:.6f}% per hour")
        print(f"Initial absorption: {initial_absorption:.6f}%")
        print(f"Predicted days to full capacity: {days_to_full_capacity:.2f}")
        print(f"Mean humidity used: {mean_humidity:.2f}%")

# Print loaded data for verification
print("\nLoaded Data:")
for category, data in results.items():
    print(f"\n{category}:")
    for key, value in data.items():
        if isinstance(value, float):
            print(f"  {key}: {value:.6f}")
        else:
            print(f"  {key}: {value}")


Predictions:

Prediction for loose hole size:
Input parameters: {'weight': 88, 'numholes': 30, 'brand': 1, 'numbags': 2, 'sizeholes': 'loose', 'temperature': 20}
Absorption rate: 0.002352% per hour
Initial absorption: -0.374176%
Predicted days to full capacity: 1470.43
Mean humidity used: 3.91%

Prediction for 1 hole size:
Input parameters: {'weight': 88, 'numholes': 30, 'brand': 1, 'numbags': 2, 'sizeholes': 1, 'temperature': 20}
Absorption rate: 0.001556% per hour
Initial absorption: 4.224063%
Predicted days to full capacity: 2426.18
Mean humidity used: 7.06%

Prediction for 2 hole size:
Input parameters: {'weight': 88, 'numholes': 30, 'brand': 1, 'numbags': 2, 'sizeholes': 2, 'temperature': 20}
Absorption rate: 0.001255% per hour
Initial absorption: 4.721910%
Predicted days to full capacity: 2909.52
Mean humidity used: 7.01%

Prediction for 3 hole size:
Input parameters: {'weight': 88, 'numholes': 30, 'brand': 1, 'numbags': 2, 'sizeholes': 3, 'temperature': 20}
Absorption rate: 0.0