In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

def process_category(category_files, category_name):
    all_data = []
    for file in category_files:
        data = pd.read_excel(file, skiprows=6)
        data['Datetime'] = pd.to_datetime(data['Date'])
        all_data.append(data)

    combined_data = pd.concat(all_data, ignore_index=True)
    combined_data = combined_data.sort_values('Datetime')

    INITIAL_WEIGHT = 88  # grams (as specified)
    MAX_CAPACITY = INITIAL_WEIGHT * 0.3  # assuming 30% of initial weight as max capacity

    combined_data['estimated_absorption'] = combined_data['Humidity (% RH)'] / 100 * MAX_CAPACITY
    combined_data['absorption_percentage'] = (combined_data['estimated_absorption'] / MAX_CAPACITY) * 100
    combined_data['time_elapsed'] = (combined_data['Datetime'] - combined_data['Datetime'].min()).dt.total_seconds() / 3600

    X = combined_data[['time_elapsed']]
    y = combined_data['absorption_percentage']

    model = LinearRegression()
    model.fit(X, y)

    absorption_rate = model.coef_[0]
    r_squared = model.score(X, y)
    y_pred = model.predict(X)
    mse = mean_squared_error(y, y_pred)

    print(f"\nResults for category {category_name}:")
    print(f"Initial Weight: 88g")
    print(f"Absorption rate: {absorption_rate:.6f}% per hour")
    print(f"R-squared: {r_squared:.4f}")
    print(f"Mean Squared Error: {mse:.4f}")

    if absorption_rate > 0:
        current_absorption = y.iloc[-1]
        remaining_absorption = 100 - current_absorption
        estimated_days = (remaining_absorption / absorption_rate) / 24
        print(f"Estimated time to reach full capacity: {estimated_days:.2f} days")
    else:
        print("Absorption rate is not positive. Cannot estimate time to full capacity.")

    print(f"\nInitial absorption: {y.iloc[0]:.2f}%")
    print(f"Final absorption: {y.iloc[-1]:.2f}%")
    print(f"Data collection period: {(combined_data['Datetime'].max() - combined_data['Datetime'].min()).total_seconds() / 86400:.2f} days")
    print(f"Number of data points: {len(combined_data)}")

    print("\nTemperature Statistics:")
    print(combined_data['Temperature (°C)'].describe())

    print("\nHumidity Statistics:")
    print(combined_data['Humidity (% RH)'].describe())

# Specify the folder path containing your Excel files
folder_path = '/content/drive/MyDrive/Silica_GEL_Project/Raw data/Number of holes in bags'

# Get all Excel files in the folder
excel_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.xlsx')]

# Categorize files
categories = {'15holes': [], '30holes': [], '45holes': [], '60holes': [], 'NOHOLES': []}
for file in excel_files:
    file_lower = file.lower()
    if '15holes' in file_lower:
        categories['15holes'].append(file)
    elif '30holes' in file_lower:
        categories['30holes'].append(file)
    elif '45holes' in file_lower:
        categories['45holes'].append(file)
    elif '60holes' in file_lower:
        categories['60holes'].append(file)
    elif 'noholes' in file_lower:
        categories['NOHOLES'].append(file)

# Process each category
for category, files in categories.items():
    if files:
        process_category(files, category)
    else:
        print(f"No files found for category {category}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Results for category 15holes:
Initial Weight: 88g
Absorption rate: 0.002078% per hour
R-squared: 0.5511
Mean Squared Error: 10.5026
Estimated time to reach full capacity: 1684.40 days

Initial absorption: 42.50%
Final absorption: 16.00%
Data collection period: 252.02 days
Number of data points: 354323

Temperature Statistics:
count    354323.000000
mean         20.267131
std           0.290381
min          20.040000
25%          20.160000
50%          20.220000
75%          20.280000
max          25.400000
Name: Temperature (°C), dtype: float64

Humidity Statistics:
count    354323.000000
mean          9.543209
std           4.836891
min           2.100000
25%           5.600000
50%           8.900000
75%          12.700000
max          81.700000
Name: Humidity (% RH), dtype: float64

Results for category 30holes:
Initial Weight: 88g
Absorption rate: 0.00287

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from google.colab import drive
import os
import pickle

# Mount Google Drive
drive.mount('/content/drive')

def process_category(category_files, category_name):
    all_data = []
    for file in category_files:
        data = pd.read_excel(file, skiprows=6)
        data['Datetime'] = pd.to_datetime(data['Date'])
        all_data.append(data)

    combined_data = pd.concat(all_data, ignore_index=True)
    combined_data = combined_data.sort_values('Datetime')

    INITIAL_WEIGHT = 88  # grams (as specified)
    MAX_CAPACITY = INITIAL_WEIGHT * 0.3  # assuming 30% of initial weight as max capacity

    combined_data['estimated_absorption'] = combined_data['Humidity (% RH)'] / 100 * MAX_CAPACITY
    combined_data['absorption_percentage'] = (combined_data['estimated_absorption'] / MAX_CAPACITY) * 100
    combined_data['time_elapsed'] = (combined_data['Datetime'] - combined_data['Datetime'].min()).dt.total_seconds() / 3600

    X = combined_data[['time_elapsed']]
    y = combined_data['absorption_percentage']

    model = LinearRegression()
    model.fit(X, y)

    absorption_rate = model.coef_[0]
    intercept = model.intercept_
    r_squared = model.score(X, y)
    y_pred = model.predict(X)
    mse = mean_squared_error(y, y_pred)

    print(f"\nResults for category {category_name}:")
    print(f"Initial Weight: 88g")
    print(f"Absorption rate: {absorption_rate:.6f}% per hour")
    print(f"Intercept: {intercept:.6f}")
    print(f"R-squared: {r_squared:.4f}")
    print(f"Mean Squared Error: {mse:.4f}")

    if absorption_rate > 0:
        current_absorption = y.iloc[-1]
        remaining_absorption = 100 - current_absorption
        estimated_days = (remaining_absorption / absorption_rate) / 24
        print(f"Estimated time to reach full capacity: {estimated_days:.2f} days")
    else:
        estimated_days = float('inf')  # Set to infinity if absorption rate is not positive
        print("Absorption rate is not positive. Cannot estimate time to full capacity.")

    print(f"\nInitial absorption: {y.iloc[0]:.2f}%")
    print(f"Final absorption: {y.iloc[-1]:.2f}%")
    print(f"Data collection period: {(combined_data['Datetime'].max() - combined_data['Datetime'].min()).total_seconds() / 86400:.2f} days")
    print(f"Number of data points: {len(combined_data)}")

    mean_humidity = combined_data['Humidity (% RH)'].mean()

    return {
        'absorption_rate': absorption_rate,
        'intercept': intercept,
        'mean_humidity': mean_humidity,
        'estimated_days': estimated_days
    }

# Specify the folder path containing your Excel files
folder_path = '/content/drive/MyDrive/Silica_GEL_Project/Raw data/Number of holes in bags'

# Get all Excel files in the folder
excel_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.xlsx')]

# Categorize files
categories = {'15holes': [], '30holes': [], '45holes': [], '60holes': [], 'NOHOLES': []}
for file in excel_files:
    file_lower = file.lower()
    if '15holes' in file_lower:
        categories['15holes'].append(file)
    elif '30holes' in file_lower:
        categories['30holes'].append(file)
    elif '45holes' in file_lower:
        categories['45holes'].append(file)
    elif '60holes' in file_lower:
        categories['60holes'].append(file)
    elif 'noholes' in file_lower:
        categories['NOHOLES'].append(file)

# Process each category and store results
results = {}
for category, files in categories.items():
    if files:
        results[category] = process_category(files, category)
    else:
        print(f"No files found for category {category}")

# Save results to a pickle file
with open('silica_gel_holes_results.pkl', 'wb') as file:
    pickle.dump(results, file)

print("\nAll results have been saved to 'silica_gel_holes_results.pkl'.")

# Print saved data for verification
print("\nSaved Data:")
for category, data in results.items():
    print(f"{category}:")
    for key, value in data.items():
        if key == 'estimated_days' and value == float('inf'):
            print(f"  {key}: Infinity")
        else:
            print(f"  {key}: {value:.6f}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Results for category 15holes:
Initial Weight: 88g
Absorption rate: 0.002078% per hour
Intercept: 3.373830
R-squared: 0.5511
Mean Squared Error: 10.5026
Estimated time to reach full capacity: 1684.40 days

Initial absorption: 42.50%
Final absorption: 16.00%
Data collection period: 252.02 days
Number of data points: 354323

Results for category 30holes:
Initial Weight: 88g
Absorption rate: 0.002878% per hour
Intercept: 1.178789
R-squared: 0.6623
Mean Squared Error: 13.0097
Estimated time to reach full capacity: 1233.31 days

Initial absorption: 37.30%
Final absorption: 14.80%
Data collection period: 252.04 days
Number of data points: 306704

Results for category 45holes:
Initial Weight: 88g
Absorption rate: 0.002803% per hour
Intercept: -0.390955
R-squared: 0.6512
Mean Squared Error: 12.6125
Estimated time to reach full capacity: 1265.19 days

Initial absorpti

In [3]:
import pickle

# Load the results
with open('silica_gel_holes_results.pkl', 'rb') as file:
    results = pickle.load(file)

def predict_duration(weight, numholes, brand, numbags, sizeholes, temperature):
    if numholes == 0:
        category = 'NOHOLES'
    else:
        category = f"{numholes}holes"

    if category not in results:
        return f"No data available for {numholes} holes"

    data = results[category]

    return data['estimated_days'], data['mean_humidity'], data['absorption_rate'], data['intercept']

# Predefined inputs
inputs = [
    {"weight": 88, "numholes": 'NOHOLES', "brand": 1, "numbags": 2, "sizeholes": '1', "temperature": 20},
    {"weight": 88, "numholes": 15, "brand": 1, "numbags": 2, "sizeholes": 1, "temperature": 20},
    {"weight": 88, "numholes": 30, "brand": 1, "numbags": 2, "sizeholes": 1, "temperature": 20},
    {"weight": 88, "numholes": 45, "brand": 1, "numbags": 2, "sizeholes": 1, "temperature": 20},
    {"weight": 88, "numholes": 60, "brand": 1, "numbags": 2, "sizeholes": 1, "temperature": 20}
]

# Make predictions for each input
print("\nPredictions:")
for input_data in inputs:
    result = predict_duration(**input_data)
    if isinstance(result, str):
        print(result)
    else:
        estimated_days, mean_humidity, absorption_rate, intercept = result
        print(f"\nPrediction for {input_data['numholes']} holes:")
        print(f"Input parameters: {input_data}")
        print(f"Absorption rate: {absorption_rate:.6f}% per hour")
        print(f"Intercept: {intercept:.6f}")
        print(f"Predicted days to full capacity: {estimated_days:.2f}")
        print(f"Mean humidity used: {mean_humidity:.2f}%")

# Print loaded data for verification
print("\nLoaded Data:")
for category, data in results.items():
    print(f"{category}:")
    for key, value in data.items():
        print(f"  {key}: {value:.6f}")


Predictions:
No data available for NOHOLES holes

Prediction for 15 holes:
Input parameters: {'weight': 88, 'numholes': 15, 'brand': 1, 'numbags': 2, 'sizeholes': 1, 'temperature': 20}
Absorption rate: 0.002078% per hour
Intercept: 3.373830
Predicted days to full capacity: 1684.40
Mean humidity used: 9.54%

Prediction for 30 holes:
Input parameters: {'weight': 88, 'numholes': 30, 'brand': 1, 'numbags': 2, 'sizeholes': 1, 'temperature': 20}
Absorption rate: 0.002878% per hour
Intercept: 1.178789
Predicted days to full capacity: 1233.31
Mean humidity used: 10.04%

Prediction for 45 holes:
Input parameters: {'weight': 88, 'numholes': 45, 'brand': 1, 'numbags': 2, 'sizeholes': 1, 'temperature': 20}
Absorption rate: 0.002803% per hour
Intercept: -0.390955
Predicted days to full capacity: 1265.19
Mean humidity used: 7.89%

Prediction for 60 holes:
Input parameters: {'weight': 88, 'numholes': 60, 'brand': 1, 'numbags': 2, 'sizeholes': 1, 'temperature': 20}
Absorption rate: 0.002842% per hour