# STATSMODELS

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as sps
import statsmodels.stats as sms
import statsmodels.api as sm

statsmodels имеет много модулей, например:
1. Модели линейной регрессии. OLS, WLS - метод наименьших квадратов для линейной регрессии; обычный, взвешенный;
2. Модели для временных рядов. AR, ARIMA, SARIMAX - авторегрессионные модели, в т.ч. со скользящей средней и с учетом сезонности;
3. Модели для классификации. Logit и Probit — модели для бинарной классификации, Multinomial Logit — многоклассовой;
4. Многомерные модели (MANOVA, факторный анализ);
5. Непараметрические методы;
6. Диагностика и тестирование моделей (тесты на нормальность, гетероскедастичность, автокорреляцию);
7. Инструменты для визуализации (QQ-plot, диаграммы с остатками);
8. Распределения;
9. Оптимизационные задачи;
10. Датасеты, формулы...

In [7]:
df = sm.datasets.get_rdataset("mtcars").data
df.head()

Unnamed: 0_level_0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
rownames,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


## Обучение моделей

In [8]:
y = df['mpg']
X = df.iloc[:, 1:]

In [10]:
mod = sm.OLS(y, X)

In [11]:
res = mod.fit()

In [14]:
print(res.summary())

                                 OLS Regression Results                                
Dep. Variable:                    mpg   R-squared (uncentered):                   0.989
Model:                            OLS   Adj. R-squared (uncentered):              0.984
Method:                 Least Squares   F-statistic:                              203.0
Date:                Sat, 22 Feb 2025   Prob (F-statistic):                    2.82e-19
Time:                        14:12:26   Log-Likelihood:                         -70.181
No. Observations:                  32   AIC:                                      160.4
Df Residuals:                      22   BIC:                                      175.0
Df Model:                          10                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

## Статтесты

statsmodels.stats.weightstats.ttest_ind( 
    x1, 
    x2, 
    alternative='two-sided', 
    usevar='pooled', 
    weights=(None, None), 
    value=0 
) -> (tstat, pvalue, df - degrees of freedom)

statsmodels.stats.proportion.proportions_ztest(
    count, 
    nobs, 
    value=None, 
    alternative='two-sided', 
    prop_var=False
)

statsmodels.stats.descriptivestats.describe(df)

In [17]:
sms.weightstats.ttest_ind(df['mpg'], df['cyl'])

(12.511633461013535, 1.295448872213735e-18, 62.0)

In [21]:
sms.descriptivestats.describe(df)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
nobs,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0
missing,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mean,20.090625,6.1875,230.721875,146.6875,3.596563,3.21725,17.84875,0.4375,0.40625,3.6875,2.8125
std_err,1.065424,0.315709,21.909473,12.120317,0.094519,0.172968,0.31589,0.089098,0.08821,0.130427,0.28553
upper_ci,22.178818,6.806279,273.663652,170.442885,3.781816,3.556262,18.467883,0.612129,0.579138,3.943131,3.372128
lower_ci,18.002432,5.568721,187.780098,122.932115,3.411309,2.878238,17.229617,0.262871,0.233362,3.431869,2.252872
std,6.026948,1.785922,123.938694,68.562868,0.534679,0.978457,1.786943,0.504016,0.498991,0.737804,1.6152
iqr,7.375,4.0,205.175,83.5,0.84,1.02875,2.0075,1.0,1.0,1.0,2.0
iqr_normal,5.467096,2.965204,152.096455,61.898643,0.622693,0.762614,1.488162,0.741301,0.741301,0.741301,1.482602
mad,4.714453,1.585938,108.785742,56.480469,0.453242,0.730187,1.376172,0.492188,0.482422,0.644531,1.300781
