In [24]:
import pymc as pm
import numpy as np
import pandas as pd
import arviz as az
import matplotlib.pyplot as plt

In [35]:
# データセットの設定
n_schools = 5  # 学校の数
n_teachers_per_school = 3  # 各学校における教師の数
n_classes_per_teacher = 2  # 各教師におけるクラスの数
n_students_per_class = 30  # 各クラスにおける学生の数

# ランダムデータ生成
np.random.seed(0)

# 学校、教師、クラスの効果をランダムに生成
school_effects = np.random.normal(0, 5, n_schools)
teacher_effects = np.random.normal(0, 3, n_schools * n_teachers_per_school)
class_effects = np.random.normal(0, 2, n_schools * n_teachers_per_school * n_classes_per_teacher)

# データフレームの生成
data = []
for school_id in range(n_schools):
    for teacher_id in range(n_teachers_per_school):
        for class_id in range(n_classes_per_teacher):
            for student_id in range(n_students_per_class):
                # 学校、教師、クラスの効果を組み合わせて学生の成績を計算
                score = (
                    50 + school_effects[school_id] +
                    teacher_effects[school_id * n_teachers_per_school + teacher_id] +
                    class_effects[school_id * n_teachers_per_school * n_classes_per_teacher + 
                                  teacher_id * n_classes_per_teacher + class_id] +
                    np.random.normal(0, 10)
                )
                data.append({
                    "StudentID": f"S_{school_id}_{teacher_id}_{class_id}_{student_id}",
                    "Score": max(0, min(100, score)),  # 成績を0〜100の範囲に制限
                    "ClassID": f"C_{school_id}_{teacher_id}_{class_id}",
                    "TeacherID": f"T_{school_id}_{teacher_id}",
                    "SchoolID": f"School_{school_id}"
                })

df = pd.DataFrame(data)
print(df.head())
print(df.info())
for _, r in df.iterrows():
    print(r)

   StudentID      Score  ClassID TeacherID  SchoolID
0  S_0_0_0_0  41.827783  C_0_0_0     T_0_0  School_0
1  S_0_0_0_1  54.651473  C_0_0_0     T_0_0  School_0
2  S_0_0_0_2  45.674397  C_0_0_0     T_0_0  School_0
3  S_0_0_0_3  38.976127  C_0_0_0     T_0_0  School_0
4  S_0_0_0_4  50.500626  C_0_0_0     T_0_0  School_0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 900 entries, 0 to 899
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   StudentID  900 non-null    object 
 1   Score      900 non-null    float64
 2   ClassID    900 non-null    object 
 3   TeacherID  900 non-null    object 
 4   SchoolID   900 non-null    object 
dtypes: float64(1), object(4)
memory usage: 35.3+ KB
None
StudentID    S_0_0_0_0
Score        41.827783
ClassID        C_0_0_0
TeacherID        T_0_0
SchoolID      School_0
Name: 0, dtype: object
StudentID    S_0_0_0_1
Score        54.651473
ClassID        C_0_0_0
TeacherID        T_0_0
SchoolID  

In [37]:
n_classes=df['ClassID'].nunique()
n_teachers=df['TeacherID'].nunique()
n_schools=df['SchoolID'].nunique()
n_students=df['StudentID'].nunique()


In [40]:
# モデルの定義

# データセットの設定
# n_schools = 5  # 学校の数
# n_teachers_per_school = 3  # 各学校における教師の数
# n_classes_per_teacher = 2  # 各教師におけるクラスの数
# n_students_per_class = 30  # 各クラスにおける学生の数


# 入力データとしてクラスごとの平均を取るべきか
class BeyesModel(pm.Model):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        with self:
            self.add_coord(name="school", values=[f"School_{i}" for i in range(n_schools)])
            self.add_coord(name="teacher", values=[f"T_{i}_{j}" for j in range(n_teachers) for i in range(n_schools)])
            self.add_coord(name="class", values=[f"C_{i}_{j}_{k}" for k in range(n_classes) for j in range(n_teachers) for i in range(n_schools)])
            
            school_mean = pm.Normal(name='school_mean', mu=50, sigma=10, dims="school")
            teacher_mean = pm.Normal(name='teacher_mean', mu=0, sigma=10)
            class_sigma = pm.HalfCauchy(name='class_sigma', beta=5, dims="class")
            
            pm.Normal(name='class_score', mu=school_mean[int(df["SchoolID"][-1])] + teacher_mean[int(df["SchoolID"][-1])*n_teachers_per_school+int(data["TeacherID"][-3])], sigma=class_sigma, dims="class")
            
            
model = BeyesModel()



KeyError: -1