In [None]:
# prompt: You are asked to calculate the slope of a given time series. The data is: []

In [None]:
# use statistical method to calculate slope
import numpy as np

y = np.array([
    120.2961212, 109.2080162, 98.9537533, 87.3951084, 78.5215728, 71.5487645,
    62.1327063, 59.215158, 47.8672713, 52.1641521, 21.27311897, 20.9583709,
    4.62114204, 0.50534558, -5.4009138, -5.7863442, -28.43758437, -24.2008175,
    -54.34674664, -39.1959767, -55.95967942, -73.8228554, -60.4839082,
    -76.2550529, -74.1188103, -105.3065618, -113.2859763, -107.1281457,
    -114.2962236, -116.9062917, -133.9619619, -144.1898013, -137.0047106,
    -151.1097519, -154.4189764, -185.8445145, -184.4541504, -193.7439903,
    -210.52955, -206.1784129, -193.2500736, -215.9068514, -230.2125345,
    -228.7670146, -250.2199457, -266.1070973, -262.9228339, -288.4576993,
    -269.12947, -283.5241858, -294.5584899, -307.9799177, -312.8651144,
    -344.1481041, -321.5574975, -329.4373664, -328.4464111, -350.2971704,
    -369.5624025, -387.273018, -382.7558586, -390.9705394, -389.1991072,
    -409.0580274, -413.6738399, -398.5579103, -412.2268511, -442.9758933,
    -439.7998292, -438.252361, -446.1181744, -454.7550086, -469.7885275,
    -481.0593032, -475.5254844, -496.8041008, -500.8903476, -515.2757517,
    -507.2368661, -531.9039554, -537.5425866, -549.0310138, -537.730355,
    -564.0519484, -563.6181999, -576.6469249, -561.3313691, -583.8259648,
    -594.4479037, -615.3990507, -615.299238, -629.5286155, -640.9007857,
    -678.2662762, -663.919456, -643.4235936, -679.0742379, -658.7583244,
    -676.3862704, -687.8975981
])
x = np.arange(len(y))

n = len(x)
sum_x = np.sum(x) # 4950
sum_y = np.sum(y) # -29010.23775504
sum_xy = np.sum(x * y) # -2109006.0367863197
sum_x2 = np.sum(x**2) # 328350

b = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x**2)
b

-8.076798894831557

In [None]:
# DeepSeek was able to correctly compute the slope this time, taking 648 seconds vs. 148 seconds when computing the incorrect slope.
# Reason for DeepSeek’s previous mistake: insufficient compute power — sum_xy was estimated, and the result of sum_y was also inaccurate.
# Even when the code is correct, the generated output may still be wrong, so the user needs to run the code themselves (though GPT has the same issue).

In [None]:
# set seed

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

np.random.seed(42)

file_path = "/content/PdM_telemetry - machine1 pressure.csv"
dataset = pd.read_csv(file_path)

dataset['time_marker'] = pd.to_datetime(dataset['datetime'])
dataset = dataset.sort_values('time_marker')

time_steps = np.arange(len(dataset))
sensor_readings = dataset['pressure'].values

growth_rate = 1.5
fluctuation_level = 1.8
cyclic_variation = 7
cycle_duration = 336

trend_effect = growth_rate * time_steps
random_variation = np.random.normal(0, fluctuation_level, len(dataset))  # Fixed random variation
cyclic_effect = cyclic_variation * np.sin(2 * np.pi * time_steps / cycle_duration)

dataset['modified_sensor_readings'] = sensor_readings + trend_effect + random_variation + cyclic_effect

X = time_steps[:100].reshape(-1, 1)
y = dataset['modified_sensor_readings'][:100]

regressor = LinearRegression()
regressor.fit(X, y)

slope = regressor.coef_[0]
y_pred = regressor.predict(X)

slope

1.608626997650629

In [None]:
y = np.array([
    113.972021, 96.8425417, 79.66548259, 116.8825082, 117.9882828, 103.6591404,
    124.3820187, 113.7961487, 122.8226025, 120.4973816, 119.2478919, 113.572426,
    106.0056189, 111.2260341, 112.9426683, 109.1883457, 118.4875615, 119.5830766,
    124.4114252, 107.273729, 126.8954537, 126.2115186, 132.4562815, 128.2934688,
    152.3993581, 144.2275388, 127.1433646, 154.9712733, 135.1302049, 144.263522,
    153.6343992, 153.1753224, 153.5606369, 145.390646, 168.5879094, 157.0493412,
    185.7548808, 156.2630919, 145.4990369, 178.8320882, 146.7587357, 151.9250376,
    177.7076008, 183.0418563, 169.9980566, 163.786648, 169.3729645, 169.7193797,
    174.7429577, 180.3129633, 175.7040688, 166.7623875, 180.9558295, 185.098228,
    176.6577398, 206.1803802, 178.9754782, 176.8255279, 200.1317998, 201.0369923,
    188.551254, 185.213703, 194.5855167, 203.2992118, 211.2455342, 194.2414645,
    209.3108035, 226.1038455, 223.9243755, 222.7375063, 218.9648207, 219.8055892,
    212.469638, 238.0222394, 215.162087, 233.9814363, 215.6033942, 216.9381807,
    224.7386822, 213.5673761, 238.160055, 212.821627, 220.1293233, 212.9667076,
    243.2293706, 240.9306931, 240.8027919, 233.8294231, 227.2869887, 230.8675768,
    232.1003134, 240.8508533, 238.5507094, 248.8676498, 241.8583785, 247.2165277,
    263.1905395, 255.8322099, 261.9058438, 281.2687446
])
x = np.arange(len(y))

n1 = len(x)
sum_x1 = np.sum(x) # 4950
sum_y1 = np.sum(y) # 17839.01386059
sum_xy1 = np.sum(x * y) # 1017070.0306750799
sum_x2_1 = np.sum(x**2) # 328350

b1 = (n1 * sum_xy1 - sum_x1 * sum_y1) / (n1 * sum_x2_1 - sum_x1**2)
b1

1.6086269976102605

In [None]:
# deepseek's result is 1.61，spending 398s

In [None]:
# slope for 101th to 200th datapoints

X_subset = time_steps[100:200].reshape(-1, 1)
y_subset = dataset['modified_sensor_readings'][100:200]

regressor1 = LinearRegression()
regressor1.fit(X_subset, y_subset)

slope1 = regressor1.coef_[0]
y_pred1 = regressor1.predict(X_subset)

slope1

1.3889445846806845

In [None]:
n_subset = len(X_subset)
sum_x_subset = np.sum(X_subset) # 14950
sum_y_subset = np.sum(y_subset) # 32722.291251864503
sum_xy_subset = np.sum(X_subset.flatten() * y_subset) # 5007716.349672262
sum_x2_subset = np.sum(X_subset**2) # 2318350

slope_subset = (n_subset * sum_xy_subset - sum_x_subset * sum_y_subset) / (n_subset * sum_x2_subset - sum_x_subset**2)
slope_subset

1.388944584680691

In [None]:
# DeepSeek's result was 1.39, taking 415 seconds.
# It’s worth noting that DeepSeek seems to carry over results from the previous prompt, so it’s best to start a new conversation for each individual task.