<a href="https://colab.research.google.com/github/arumajirou/-daily-test/blob/main/timesfm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
import numpy as np
import timesfm
import gc


df = pd.read_table('http://vvslot.com/download.php?m=777&f=numbers' + str(num) + '.txt', encoding="shift-jis", sep=",", names=["part", "date", "week", "eto", "抽選数字"], parse_dates=[1], dtype='object')

# '抽選数字' カラムの文字列を桁ごとに分割して新しいカラムを作成
df[['digit1', 'digit2', 'digit3', 'digit4']] = df['抽選数字'].apply(lambda x: pd.Series(list(x)))
df[['digit1', 'digit2', 'digit3', 'digit4']] = df[['digit1', 'digit2', 'digit3', 'digit4']].astype(int)

# ガベージコレクションを実行してメモリを解放
gc.collect()

# TimesFMモデルのロード
tfm = timesfm.TimesFm(
    context_len=512,  # コンテキストの長さを適切に設定
    horizon_len=1,    # 予測の長さ
    input_patch_len=32,
    output_patch_len=128,
    num_layers=20,
    model_dims=1280,
    backend='cuda',  # 'cpu' or 'cuda'
)

# モデルのチェックポイントのロード
tfm.load_from_checkpoint(repo_id="google/timesfm-1.0-200m")

# 'digit1' のデータをチャンクに分けて使用
chunk_size = 512
predictions = []

for start in range(0, len(df), chunk_size):
    end = start + chunk_size
    context_chunk = df['digit1'].values[start:end].reshape(1, -1).astype(np.float32)

    if len(context_chunk[0]) < 512:
        # パディングを追加してコンテキストの長さを512にする
        context_chunk = np.pad(context_chunk, ((0, 0), (0, 512 - len(context_chunk[0]))), 'constant', constant_values=0)

    frequency_input = [0]  # 高頻度データとして扱う

    # 予測の実行
    point_forecast, experimental_quantile_forecast = tfm.forecast(
        inputs=context_chunk,
        freq=frequency_input,
    )

    predictions.extend(point_forecast[0])

    # ガベージコレクションを実行してメモリを解放
    del context_chunk
    gc.collect()

print("予測されたdigit1の次の値:", predictions)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Constructing model weights.




Constructed model weights in 4.17 seconds.
Restoring checkpoint from /root/.cache/huggingface/hub/models--google--timesfm-1.0-200m/snapshots/8775f7531211ac864b739fe776b0b255c277e2be/checkpoints.


ERROR:absl:For checkpoint version > 1.0, we require users to provide
          `train_state_unpadded_shape_dtype_struct` during checkpoint
          saving/restoring, to avoid potential silent bugs when loading
          checkpoints to incompatible unpadded shapes of TrainState.


Restored checkpoint in 4.35 seconds.
Jitting decoding.
Jitted decoding in 18.73 seconds.
予測されたdigit1の次の値: [4.1077833, 4.4425125, 4.1173863, 4.273387, 4.5404367, 4.8194537, 4.7339745, 4.488575, 4.3625503, 4.314113, 4.586942, 4.19058, 0.0]


In [8]:
import plotly.graph_objs as go
import plotly.express as px

# Assuming previous code has executed and you have `df` and `predictions`

# Adjust the length of predictions to match the length of the actual data
actual_digit1 = df['digit1'].astype(int).values[:len(predictions)]

# Create a new DataFrame to store actual and predicted values
result_df = pd.DataFrame({
    'Index': range(len(actual_digit1)),
    'Actual': actual_digit1,
    'Predicted': predictions
})

# Create the plot
fig = go.Figure()

# Add actual values to the plot
fig.add_trace(go.Scatter(
    x=result_df['Index'],
    y=result_df['Actual'],
    mode='lines',
    name='Actual Values'
))

# Add predicted values to the plot
fig.add_trace(go.Scatter(
    x=result_df['Index'],
    y=result_df['Predicted'],
    mode='lines',
    name='Predicted Values',
    line=dict(dash='dash')
))

# Update layout
fig.update_layout(
    title='Actual vs Predicted Values for digit1',
    xaxis_title='Index',
    yaxis_title='digit1 Values',
    legend_title='Legend',
    template='plotly_white'
)

# Show the plot
fig.show()


In [7]:
import pandas as pd
import numpy as np
import timesfm
import gc
import plotly.graph_objs as go

# numの定義（例えばnum=1など）
num = 4

# データの読み込み
df = pd.read_table('http://vvslot.com/download.php?m=777&f=numbers' + str(num) + '.txt', encoding="shift-jis", sep=",", names=["part", "date", "week", "eto", "抽選数字"], parse_dates=[1], dtype='object')

# '抽選数字' カラムの文字列を桁ごとに分割して新しいカラムを作成
df[['digit1', 'digit2', 'digit3', 'digit4']] = df['抽選数字'].apply(lambda x: pd.Series(list(x)))
df[['digit1', 'digit2', 'digit3', 'digit4']] = df[['digit1', 'digit2', 'digit3', 'digit4']].astype(int)

# 移動平均の計算（ウィンドウサイズは任意に設定、ここでは例えば5とする）
df['digit1_ma'] = df['digit1'].rolling(window=5).mean()

# ガベージコレクションを実行してメモリを解放
gc.collect()

# TimesFMモデルのロード
tfm = timesfm.TimesFm(
    context_len=512,  # コンテキストの長さを適切に設定
    horizon_len=1,    # 予測の長さ
    input_patch_len=32,
    output_patch_len=128,
    num_layers=20,
    model_dims=1280,
    backend='cuda',  # 'cpu' or 'cuda'
)

# モデルのチェックポイントのロード
tfm.load_from_checkpoint(repo_id="google/timesfm-1.0-200m")

# 'digit1_ma' のデータをチャンクに分けて使用
chunk_size = 512
predictions = []

# 移動平均のNaNを除去
ma_values = df['digit1_ma'].dropna().values

for start in range(0, len(ma_values), chunk_size):
    end = start + chunk_size
    context_chunk = ma_values[start:end].reshape(1, -1).astype(np.float32)

    if len(context_chunk[0]) < 512:
        # パディングを追加してコンテキストの長さを512にする
        context_chunk = np.pad(context_chunk, ((0, 0), (0, 512 - len(context_chunk[0]))), 'constant', constant_values=0)

    frequency_input = [0]  # 高頻度データとして扱う

    # 予測の実行
    point_forecast, experimental_quantile_forecast = tfm.forecast(
        inputs=context_chunk,
        freq=frequency_input,
    )

    predictions.extend(point_forecast[0])

    # ガベージコレクションを実行してメモリを解放
    del context_chunk
    gc.collect()

print("予測されたdigit1の次の値:", predictions)

# 実測値と予測値をプロット
# 実測値と予測値の長さを揃える
actual_ma_values = ma_values[:len(predictions)]

# 結果をDataFrameに格納
result_df = pd.DataFrame({
    'Index': range(len(actual_ma_values)),
    'Actual': actual_ma_values,
    'Predicted': predictions
})

# Plotlyでプロット
fig = go.Figure()

# 実測値を追加
fig.add_trace(go.Scatter(
    x=result_df['Index'],
    y=result_df['Actual'],
    mode='lines',
    name='Actual Values'
))

# 予測値を追加
fig.add_trace(go.Scatter(
    x=result_df['Index'],
    y=result_df['Predicted'],
    mode='lines',
    name='Predicted Values',
    line=dict(dash='dash')
))

# レイアウトを更新
fig.update_layout(
    title='Actual vs Predicted Moving Average Values for digit1',
    xaxis_title='Index',
    yaxis_title='digit1 Moving Average Values',
    legend_title='Legend',
    template='plotly_white'
)

# プロットを表示
fig.show()


Multiprocessing context has already been set.


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Constructing model weights.




Constructed model weights in 5.05 seconds.
Restoring checkpoint from /root/.cache/huggingface/hub/models--google--timesfm-1.0-200m/snapshots/8775f7531211ac864b739fe776b0b255c277e2be/checkpoints.


ERROR:absl:For checkpoint version > 1.0, we require users to provide
          `train_state_unpadded_shape_dtype_struct` during checkpoint
          saving/restoring, to avoid potential silent bugs when loading
          checkpoints to incompatible unpadded shapes of TrainState.


Restored checkpoint in 0.77 seconds.
Jitting decoding.
Jitted decoding in 15.85 seconds.
予測されたdigit1の次の値: [3.0032272, 5.4502115, 4.675059, 3.9418623, 3.3823328, 4.489526, 3.8813527, 5.245159, 4.0363116, 3.5580382, 5.472446, 4.691943, 0.0]


In [19]:
import pandas as pd
import numpy as np
import timesfm
import gc
import plotly.graph_objs as go

# numの定義（例えばnum=1など）
num = 4

# データの読み込み
df = pd.read_table('http://vvslot.com/download.php?m=777&f=numbers' + str(num) + '.txt', encoding="shift-jis", sep=",", names=["part", "date", "week", "eto", "抽選数字"], parse_dates=[1], dtype='object')

# '抽選数字' カラムの文字列を桁ごとに分割して新しいカラムを作成
df[['digit1', 'digit2', 'digit3', 'digit4']] = df['抽選数字'].apply(lambda x: pd.Series(list(x)))
df[['digit1', 'digit2', 'digit3', 'digit4']] = df[['digit1', 'digit2', 'digit3', 'digit4']].astype(int)

# 移動平均の計算（ウィンドウサイズは任意に設定、ここでは例えば5とする）
df['digit1_ma'] = df['digit1'].rolling(window=5).mean()

# ガベージコレクションを実行してメモリを解放
gc.collect()

# TimesFMモデルのロード
tfm = timesfm.TimesFm(
    context_len=256,  # 調整例: コンテキストの長さを512から256に変更
    horizon_len=1,    # 予測の長さはそのまま
    input_patch_len=16,  # 調整例: 入力パッチの長さを32から16に変更
    output_patch_len=64,  # 調整例: 出力パッチの長さを128から64に変更
    num_layers=10,  # 調整例: レイヤー数を20から10に減らす
    model_dims=640,  # 調整例: モデルの次元を1280から640に減らす
    backend='cuda',  # 'cpu' or 'cuda'
)

# モデルのチェックポイントのロード
tfm.load_from_checkpoint(repo_id="google/timesfm-1.0-200m")

# 'digit1_ma' のデータをチャンクに分けて使用
chunk_size = 256
predictions = []

# 移動平均のNaNを除去
ma_values = df['digit1_ma'].dropna().values

for start in range(0, len(ma_values), chunk_size):
    end = start + chunk_size
    context_chunk = ma_values[start:end].reshape(1, -1).astype(np.float32)

    if len(context_chunk[0]) < 256:
        # パディングを追加してコンテキストの長さを512にする
        context_chunk = np.pad(context_chunk, ((0, 0), (0, 512 - len(context_chunk[0]))), 'constant', constant_values=0)

    frequency_input = [0]  # 高頻度データとして扱う

    # 予測の実行
    point_forecast, experimental_quantile_forecast = tfm.forecast(
        inputs=context_chunk,
        freq=frequency_input,
    )

    predictions.extend(point_forecast[0])

    # ガベージコレクションを実行してメモリを解放
    del context_chunk
    gc.collect()

print("予測されたdigit1の次の値:", predictions)

# 実測値と予測値をプロット
# 実測値と予測値の長さを揃える
actual_ma_values = ma_values[:len(predictions)]

# 結果をDataFrameに格納
result_df = pd.DataFrame({
    'Index': range(len(actual_ma_values)),
    'Actual': actual_ma_values,
    'Predicted': predictions
})

# Plotlyでプロット
fig = go.Figure()

# 実測値を追加
fig.add_trace(go.Scatter(
    x=result_df['Index'],
    y=result_df['Actual'],
    mode='lines',
    name='Actual Values'
))

# 予測値を追加
fig.add_trace(go.Scatter(
    x=result_df['Index'],
    y=result_df['Predicted'],
    mode='lines',
    name='Predicted Values',
    line=dict(dash='dash')
))

# レイアウトを更新
fig.update_layout(
    title='Actual vs Predicted Moving Average Values for digit1',
    xaxis_title='Index',
    yaxis_title='digit1 Moving Average Values',
    legend_title='Legend',
    template='plotly_white'
)

# プロットを表示
fig.show()


Multiprocessing context has already been set.


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Constructing model weights.




Constructed model weights in 1.68 seconds.
Restoring checkpoint from /root/.cache/huggingface/hub/models--google--timesfm-1.0-200m/snapshots/8775f7531211ac864b739fe776b0b255c277e2be/checkpoints.


ERROR:absl:For checkpoint version > 1.0, we require users to provide
          `train_state_unpadded_shape_dtype_struct` during checkpoint
          saving/restoring, to avoid potential silent bugs when loading
          checkpoints to incompatible unpadded shapes of TrainState.


Restored checkpoint in 0.58 seconds.
Jitting decoding.


ScopeParamShapeError: Initializer expected to generate shape (64, 1280) but got shape (32, 640) instead for parameter "w" in "/input_ff_layer/hidden_layer/linear". (https://flax.readthedocs.io/en/latest/api_reference/flax.errors.html#flax.errors.ScopeParamShapeError)

In [16]:
fig.show()


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6507 entries, 0 to 6506
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   part       6507 non-null   object 
 1   date       6507 non-null   object 
 2   week       6507 non-null   object 
 3   eto        6507 non-null   object 
 4   抽選数字       6507 non-null   object 
 5   digit1     6507 non-null   int64  
 6   digit2     6507 non-null   int64  
 7   digit3     6507 non-null   int64  
 8   digit4     6507 non-null   int64  
 9   digit1_ma  6503 non-null   float64
dtypes: float64(1), int64(4), object(5)
memory usage: 508.5+ KB


In [20]:
import pandas as pd
import numpy as np
import timesfm
import gc

# データの読み込み
num = 4
df = pd.read_table('http://vvslot.com/download.php?m=777&f=numbers' + str(num) + '.txt', encoding="shift-jis", sep=",", names=["part", "date", "week", "eto", "抽選数字"], parse_dates=[1], dtype='object')

# '抽選数字' カラムの文字列を桁ごとに分割して新しいカラムを作成
df[['digit1', 'digit2', 'digit3', 'digit4']] = df['抽選数字'].apply(lambda x: pd.Series(list(x)))
df[['digit1', 'digit2', 'digit3', 'digit4']] = df[['digit1', 'digit2', 'digit3', 'digit4']].astype(int)

# 移動平均の計算（ウィンドウサイズは任意に設定、ここでは5とする）
df['digit1_ma'] = df['digit1'].rolling(window=5).mean()

# ガベージコレクションを実行してメモリを解放
gc.collect()

# 新しいパラメータでTimesFMモデルを定義
tfm = timesfm.TimesFm(
    context_len=256,
    horizon_len=1,
    input_patch_len=16,
    output_patch_len=64,
    num_layers=10,
    model_dims=640,
    backend='cuda',
)
# 移動平均のNaNを除去
ma_values = df['digit1_ma'].dropna().values

# トレーニングデータを準備
chunk_size = 256
train_data = []
train_labels = []

for start in range(0, len(ma_values) - chunk_size):
    end = start + chunk_size
    context_chunk = ma_values[start:end].reshape(1, -1).astype(np.float32)
    label_chunk = ma_values[end:end + 1]  # 次の値を予測するためのラベル

    if len(context_chunk[0]) == chunk_size:
        train_data.append(context_chunk)
        train_labels.append(label_chunk)

train_data = np.array(train_data).squeeze()
train_labels = np.array(train_labels).squeeze()
# TimesFMのトレーニング関数は仮定しているため、実際のトレーニングループは公式ドキュメントを参照してください
# ここでは、トレーニングデータを使ってトレーニングを行う例を示します

# トレーニングループ（仮定）
epochs = 10
for epoch in range(epochs):
    for i in range(len(train_data)):
        inputs = train_data[i].reshape(1, -1)
        targets = train_labels[i].reshape(1, -1)
        loss = tfm.train_on_batch(inputs, targets)  # 仮定の関数
        print(f'Epoch {epoch + 1}/{epochs}, Batch {i + 1}/{len(train_data)}, Loss: {loss}')

# ガベージコレクションを実行してメモリを解放
gc.collect()
# チェックポイントの保存
checkpoint_path = "/path/to/new/checkpoint"
tfm.save_checkpoint(checkpoint_path)


Multiprocessing context has already been set.


AttributeError: 'TimesFm' object has no attribute 'train_on_batch'

In [55]:
import pandas as pd
import numpy as np
import timesfm
import gc

# データの読み込み
num = 4
df = pd.read_table('http://vvslot.com/download.php?m=777&f=numbers' + str(num) + '.txt', encoding="shift-jis", sep=",", names=["part", "date", "week", "eto", "抽選数字"], parse_dates=[1], dtype='object')

# '抽選数字' カラムの文字列を桁ごとに分割して新しいカラムを作成
df[['digit1', 'digit2', 'digit3', 'digit4']] = df['抽選数字'].apply(lambda x: pd.Series(list(x)))
df[['digit1', 'digit2', 'digit3', 'digit4']] = df[['digit1', 'digit2', 'digit3', 'digit4']].astype(int)

# 移動平均の計算（ウィンドウサイズは任意に設定、ここでは5とする）
df['digit1_ma'] = df['digit1'].rolling(window=5).mean()

# ガベージコレクションを実行してメモリを解放
gc.collect()
context_len=512
# TimesFMモデルのロード
tfm = timesfm.TimesFm(
    context_len=context_len,
    horizon_len=1,
    input_patch_len=32,
    output_patch_len=128,
    num_layers=20,
    model_dims=1280,
    backend='cuda',
)

# モデルのチェックポイントのロード
tfm.load_from_checkpoint(repo_id="google/timesfm-1.0-200m")

# 移動平均のNaNを除去
ma_values = df['digit1_ma'].dropna().values

# 予測を実行する関数
def forecast_with_timesfm(model, ma_values, chunk_size=context_len):
    predictions = []
    for start in range(0, len(ma_values), chunk_size):
        end = start + chunk_size
        context_chunk = ma_values[start:end].reshape(1, -1).astype(np.float32)

        if len(context_chunk[0]) < chunk_size:
            # パディングを追加してコンテキストの長さをchunk_sizeにする
            context_chunk = np.pad(context_chunk, ((0, 0), (0, chunk_size - len(context_chunk[0]))), 'constant', constant_values=0)

        frequency_input = [0]  # 高頻度データとして扱う

        # 予測の実行
        point_forecast, experimental_quantile_forecast = model.forecast(
            inputs=context_chunk,
            freq=frequency_input,
        )

        predictions.extend(point_forecast[0])

        # ガベージコレクションを実行してメモリを解放
        del context_chunk
        gc.collect()

    return predictions

# TimesFMで予測を実行
predictions = forecast_with_timesfm(tfm, ma_values)

print("予測されたdigit1の次の値:", predictions)

# 実測値と予測値をプロット
# 実測値と予測値の長さを揃える
actual_ma_values = ma_values[:len(predictions)]

# 結果をDataFrameに格納
result_df = pd.DataFrame({
    'Index': range(len(actual_ma_values)),
    'Actual': actual_ma_values,
    'Predicted': predictions
})

# Plotlyでプロット
import plotly.graph_objs as go

fig = go.Figure()

# 実測値を追加
fig.add_trace(go.Scatter(
    x=result_df['Index'],
    y=result_df['Actual'],
    mode='lines',
    name='Actual Values'
))

# 予測値を追加
fig.add_trace(go.Scatter(
    x=result_df['Index'],
    y=result_df['Predicted'],
    mode='lines',
    name='Predicted Values',
    line=dict(dash='dash')
))

# レイアウトを更新
fig.update_layout(
    title='Actual vs Predicted Moving Average Values for digit1',
    xaxis_title='Index',
    yaxis_title='digit1 Moving Average Values',
    legend_title='Legend',
    template='plotly_white'
)

# プロットを表示
fig.show()


Multiprocessing context has already been set.


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Constructing model weights.




Constructed model weights in 3.62 seconds.
Restoring checkpoint from /root/.cache/huggingface/hub/models--google--timesfm-1.0-200m/snapshots/8775f7531211ac864b739fe776b0b255c277e2be/checkpoints.


ERROR:absl:For checkpoint version > 1.0, we require users to provide
          `train_state_unpadded_shape_dtype_struct` during checkpoint
          saving/restoring, to avoid potential silent bugs when loading
          checkpoints to incompatible unpadded shapes of TrainState.


Restored checkpoint in 0.89 seconds.
Jitting decoding.
Jitted decoding in 18.39 seconds.
予測されたdigit1の次の値: [3.0032272, 5.4502115, 4.675059, 3.9418623, 3.3823328, 4.489526, 3.8813527, 5.245159, 4.0363116, 3.5580382, 5.472446, 4.691943, 0.0]


In [54]:
fig.show()

In [46]:
df

Unnamed: 0,part,date,week,eto,抽選数字,digit1,digit2,digit3,digit4,digit1_ma
0,1,1994-10-07 00:00:00,金,大安,1149,1,1,4,9,1
1,2,1994-10-14 00:00:00,金,赤口,7921,7,9,2,1,8
2,3,1994-10-21 00:00:00,金,先勝,0097,0,0,9,7,8
3,4,1994-10-28 00:00:00,金,友引,0171,0,1,7,1,8
4,5,1994-11-04 00:00:00,金,大安,0265,0,2,6,5,8
...,...,...,...,...,...,...,...,...,...,...
6502,6503,2024-07-02 00:00:00,火,先勝,5240,5,2,4,0,29408
6503,6504,2024-07-03 00:00:00,水,友引,4896,4,8,9,6,29412
6504,6505,2024-07-04 00:00:00,木,先負,4365,4,3,6,5,29416
6505,6506,2024-07-05 00:00:00,金,仏滅,6715,6,7,1,5,29422
