In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [133]:
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import sklearn
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm
from pathlib import Path

path_base = Path("/content/drive/MyDrive/Colab Notebooks/麻雀")
filepath_xlsx = path_base.joinpath("df_rework_test_clean.xlsx")
df = pd.read_excel(filepath_xlsx,index_col=0)
df = df.reindex(columns=["記録対戦数","和了巡数","平均和了","平均放銃","和了率","放銃率","ツモ率","ダマ率","流局率","流局聴牌率","副露率","立直率","飛び率","平均順位"])
display(df.shape)
display(df.head(3))

(137, 14)

Unnamed: 0,記録対戦数,和了巡数,平均和了,平均放銃,和了率,放銃率,ツモ率,ダマ率,流局率,流局聴牌率,副露率,立直率,飛び率,平均順位
[聖3] 【腹ペコ】,340,12.214,6576,4913,0.2454,0.1178,0.3759,0.13,0.1268,0.508,0.3284,0.2005,0.0441,2.176
[聖2] まなブリオン,375,12.307,6874,5489,0.2474,0.1478,0.372,0.0899,0.1155,0.5839,0.3376,0.2169,0.0773,2.328
[魂1] noprops,600,12.1,6519,5624,0.2432,0.103,0.3695,0.0701,0.1326,0.4618,0.3814,0.2164,0.06,2.245


In [138]:
def mult_LinearRegression(df, aim="平均順位"):
  x = df.drop([aim], axis=1)
  y = df[aim]

  # 標準化
  xss_pd = (x - x.mean()) / x.std()
  yss_pd = (y - y.mean()) / y.std()

  model = sm.OLS(yss_pd, sm.add_constant(xss_pd))
  # モデルの作成
  results = model.fit()
  #結果の詳細を表示
  print(results.summary())


## 平均順位は何が影響してるのか？

In [140]:
mult_LinearRegression(df, "平均順位")

                            OLS Regression Results                            
Dep. Variable:                   平均順位   R-squared:                       0.847
Model:                            OLS   Adj. R-squared:                  0.831
Method:                 Least Squares   F-statistic:                     52.38
Date:                Wed, 01 Nov 2023   Prob (F-statistic):           9.35e-44
Time:                        06:07:34   Log-Likelihood:                -65.297
No. Observations:                 137   AIC:                             158.6
Df Residuals:                     123   BIC:                             199.5
Df Model:                          13                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const      -1.498e-15      0.035  -4.26e-14      1.0

・平均順位の値は低ければ低いほど良いため、coef（回帰係数）が-1に近いほど平均順位にいい影響を与えると定義する

・偏回帰係数から、上位3つの和了率・平均和了・飛び率が平均順位の向上を及ぼす。

・逆に、下位3つの放銃率・ダマ率・立直率が平均順位の悪化を及ぼす。

・切片はe-16（10の-16乗）なのは、正規化の影響によるもの

・決定係数が約0.84とかなり高く、今回使用した重回帰モデルで平均順位を上げるために重要な要素の説明できる可能性大

## 何が和了率に一番影響をおよぼしているのか？

In [136]:
df2 = df.drop(["平均順位"], axis=1)
display(df2.shape)
display(df2.head())

(137, 13)

Unnamed: 0,記録対戦数,和了巡数,平均和了,平均放銃,和了率,放銃率,ツモ率,ダマ率,流局率,流局聴牌率,副露率,立直率,飛び率
[聖3] 【腹ペコ】,340,12.214,6576,4913,0.2454,0.1178,0.3759,0.13,0.1268,0.508,0.3284,0.2005,0.0441
[聖2] まなブリオン,375,12.307,6874,5489,0.2474,0.1478,0.372,0.0899,0.1155,0.5839,0.3376,0.2169,0.0773
[魂1] noprops,600,12.1,6519,5624,0.2432,0.103,0.3695,0.0701,0.1326,0.4618,0.3814,0.2164,0.06
[聖3] 富田美咲,377,12.033,6502,5038,0.2516,0.1115,0.353,0.1066,0.1339,0.4942,0.3633,0.1826,0.0398
[聖3] まさちき。,476,12.277,6821,5464,0.2392,0.121,0.364,0.0743,0.1319,0.4548,0.3863,0.1846,0.0441


In [141]:
mult_LinearRegression(df2, "和了率")

                            OLS Regression Results                            
Dep. Variable:                    和了率   R-squared:                       0.774
Model:                            OLS   Adj. R-squared:                  0.752
Method:                 Least Squares   F-statistic:                     35.42
Date:                Wed, 01 Nov 2023   Prob (F-statistic):           2.36e-34
Time:                        06:07:42   Log-Likelihood:                -91.967
No. Observations:                 137   AIC:                             209.9
Df Residuals:                     124   BIC:                             247.9
Df Model:                          12                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       1.407e-15      0.043   3.31e-14      1.0

## 立直

In [145]:
df3 = df.drop(["和了率"], axis=1)
mult_LinearRegression(df3, "立直率")

                            OLS Regression Results                            
Dep. Variable:                    立直率   R-squared:                       0.793
Model:                            OLS   Adj. R-squared:                  0.773
Method:                 Least Squares   F-statistic:                     39.64
Date:                Wed, 01 Nov 2023   Prob (F-statistic):           1.13e-36
Time:                        06:22:49   Log-Likelihood:                -85.931
No. Observations:                 137   AIC:                             197.9
Df Residuals:                     124   BIC:                             235.8
Df Model:                          12                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const      -1.446e-16      0.041  -3.55e-15      1.0