<a href="https://colab.research.google.com/github/econ105/AI/blob/main/sklearn/vs/arima.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import yfinance as yf
import pandas as pd
from sklearn.linear_model import LinearRegression

# Fetch HSBC (0005.HK) historical data
ticker = yf.Ticker("0005.HK")
df = ticker.history(period="max")

# Reset index to get Date column
df = df.reset_index()

# Use Date ordinal as predictor
X = df["Date"].map(pd.Timestamp.toordinal).values.reshape(-1, 1)
y = df["Open"].values

# Fit linear regression
model = LinearRegression()
model.fit(X, y)

# Get the last date in the dataset
last_date = df["Date"].max()

# Generate next 3 trading days (naively adding 1 day each — in practice, you’d skip weekends/holidays)
prediction_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=3, freq="D")

# Predict Open prices
prediction_ordinals = prediction_dates.map(pd.Timestamp.toordinal).values.reshape(-1, 1)
predicted_opens = model.predict(prediction_ordinals)

# Combine into DataFrame
prediction_df = pd.DataFrame({
    "Date": prediction_dates,
    "Predicted_Open": predicted_opens
})

print(prediction_df)


                       Date  Predicted_Open
0 2026-02-25 00:00:00+08:00       52.092029
1 2026-02-26 00:00:00+08:00       52.093371
2 2026-02-27 00:00:00+08:00       52.094714


In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA

# Define the historical data
data = {
    "Date": [
        "9/1/2024", "10/1/2024", "11/1/2024", "12/1/2024", "15/1/2024", "16/1/2024",
        "17/1/2024", "18/1/2024", "19/1/2024", "22/1/2024", "23/1/2024", "24/1/2024",
        "25/1/2024", "26/1/2024"
    ],
    "Open": [
        86.800003, 86.5, 85.449997, 86.199997, 85.449997, 85.599998, 83.199997,
        82.25, 83.5, 82.5, 79.599998, 81, 82.949997, 82.800003
    ]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Convert the Date column to datetime
df["Date"] = pd.to_datetime(df["Date"], dayfirst=True)

# Set the Date column as the index
df.set_index("Date", inplace=True)

# Convert the data type of the Open column to float
df["Open"] = df["Open"].astype(float)

# Fit the ARIMA model
model = ARIMA(df["Open"], order=(1, 0, 0))
model_fit = model.fit()

# Predict the open values for the next 3 days
forecast = model_fit.forecast(steps=3)
next_dates = pd.date_range(start=df.index[-1], periods=4, closed='right')[1:]
prediction_data = pd.DataFrame({
    "Date": next_dates,
    "Open": forecast[0]
})

# Print the predicted open values
print(prediction_data)

        Date       Open
0 2024-01-28  83.098213
1 2024-01-29  83.098213


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  next_dates = pd.date_range(start=df.index[-1], periods=4, closed='right')[1:]


In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression

# Define the historical data
data = {
    "Date": [
        "9/1/2024", "10/1/2024", "11/1/2024", "12/1/2024", "15/1/2024", "16/1/2024",
        "17/1/2024", "18/1/2024", "19/1/2024", "22/1/2024", "23/1/2024", "24/1/2024",
        "25/1/2024", "26/1/2024"
    ],
    "Open": [
        86.800003, 86.5, 85.449997, 86.199997, 85.449997, 85.599998, 83.199997,
        82.25, 83.5, 82.5, 79.599998, 81, 82.949997, 82.800003
    ],
    "Volume": [
        828137, 1198206, 726415, 1743907, 0, 3421388, 2046396, 2432394,
        1384820, 2201636, 1429331, 2226788, 1429685, 1187765
    ]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Convert the Date column to datetime
df["Date"] = pd.to_datetime(df["Date"], dayfirst=True)

# Sort the DataFrame by date in ascending order
df = df.sort_values(by="Date", ascending=True)

# Reset the index after sorting
df = df.reset_index(drop=True)

# Split the data into training and testing sets
train_size = int(len(df) * 0.8)
train_data = df[:train_size]
test_data = df[train_size:]

# Fit the linear regression model for Volume
model_volume = LinearRegression()
model_volume.fit(train_data[["Open"]], train_data["Volume"])

# Predict the next Open value
next_open = df["Open"].iloc[-1]
next_volume = model_volume.predict([[next_open]])

# Print the predicted next Volume value
print("Predicted Volume:", next_volume[0])

Predicted Volume: 1739079.292033499




In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression

# Define the historical data
data = {
    "Date": [
        "9/1/2024", "10/1/2024", "11/1/2024", "12/1/2024", "15/1/2024", "16/1/2024",
        "17/1/2024", "18/1/2024", "19/1/2024", "22/1/2024", "23/1/2024", "24/1/2024",
        "25/1/2024", "26/1/2024"
    ],
    "Open": [
        86.800003, 86.5, 85.449997, 86.199997, 85.449997, 85.599998, 83.199997,
        82.25, 83.5, 82.5, 79.599998, 81, 82.949997, 82.800003
    ],
    "Volume": [
        828137, 1198206, 726415, 1743907, 0, 3421388, 2046396, 2432394,
        1384820, 2201636, 1429331, 2226788, 1429685, 1187765
    ]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Convert the Date column to datetime
df["Date"] = pd.to_datetime(df["Date"], dayfirst=True)

# Sort the DataFrame by date in ascending order
df = df.sort_values(by="Date", ascending=True)

# Reset the index after sorting
df = df.reset_index(drop=True)

# Split the data into training and testing sets
train_size = int(len(df) * 0.8)
train_data = df[:train_size]
test_data = df[train_size:]

# Fit the linear regression model for Volume
model_volume = LinearRegression()
model_volume.fit(train_data[["Open"]], train_data["Volume"])

# Predict the next Open and Volume values
next_open = df["Open"].iloc[-1]
next_volume = model_volume.predict([[next_open]])

# Print the predicted next Open and Volume values
print("Predicted Open:", next_open)
print("Predicted Volume:", next_volume[0])

Predicted Open: 82.800003
Predicted Volume: 1739079.292033499


