In [306]:
import pandas as pd
import wbgapi as wb
import numpy as np
import statsmodels.api as sm
import yfinance as yf

import matplotlib.pyplot as plt
from statsmodels.formula.api import ols
from statsmodels.formula.api import logit
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix

from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet

# Q1

In [307]:
Literacy=wb.data.DataFrame('SE.ADT.LITR.FE.ZS','TUR')
Labor=wb.data.DataFrame('SL.TLF.TOTL.FE.ZS','TUR')
Poverty=wb.data.DataFrame('SI.POV.NAHC','TUR')
health=wb.data.DataFrame('SH.XPD.CHEX.PC.CD','TUR')
GDP=wb.data.DataFrame('NY.GDP.PCAP.CD','TUR')
Mortality=wb.data.DataFrame('SH.DYN.MORT','TUR')

In [308]:
df=pd.concat([Mortality.transpose(),Literacy.transpose(),Labor.transpose(),Poverty.transpose(),health.transpose(),GDP.transpose()],axis=1)

In [309]:
df.columns=['Mortality','Literacy','Labor','Poverty','health','GDP']

In [310]:
model = ols('Mortality ~ Literacy + Labor + Poverty + health + GDP', data=df).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:              Mortality   R-squared:                       0.992
Model:                            OLS   Adj. R-squared:                  0.986
Method:                 Least Squares   F-statistic:                     169.6
Date:                Mon, 07 Nov 2022   Prob (F-statistic):           3.81e-07
Time:                        23:50:39   Log-Likelihood:                -7.8492
No. Observations:                  13   AIC:                             27.70
Df Residuals:                       7   BIC:                             31.09
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     69.8438      9.987      6.994      0.0



Although literacy is the best variable in explaining the model according to sum suquare, we cannot be sure of the sign of the variable when we look at the 95% confidence interval.

In [311]:
sm.stats.anova_lm(model)

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
Literacy,1.0,296.579278,296.579278,815.327082,1.661736e-08
Labor,1.0,8.414644,8.414644,23.132725,0.001944516
Poverty,1.0,0.940111,0.940111,2.584462,0.1519524
health,1.0,1.829951,1.829951,5.030724,0.05981897
GDP,1.0,0.769732,0.769732,2.116072,0.1890863
Residual,7.0,2.546285,0.363755,,


In [312]:
model = ols('Mortality ~ Literacy * Labor ', data=df).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:              Mortality   R-squared:                       0.991
Model:                            OLS   Adj. R-squared:                  0.988
Method:                 Least Squares   F-statistic:                     392.7
Date:                Mon, 07 Nov 2022   Prob (F-statistic):           1.83e-11
Time:                        23:50:41   Log-Likelihood:                -26.882
No. Observations:                  15   AIC:                             61.76
Df Residuals:                      11   BIC:                             64.60
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept       -288.5163    107.135     -2.



Literacy looks okey but residual is higher than other model. So we might need other variables too.

In [313]:
sm.stats.anova_lm(model)

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
Literacy,1.0,2724.493944,2724.493944,947.190065,5.048734e-12
Labor,1.0,616.218427,616.218427,214.232802,1.474837e-08
Literacy:Labor,1.0,47.884604,47.884604,16.64743,0.001819812
Residual,11.0,31.640359,2.876396,,


In [314]:
model = ols('Mortality ~ Literacy * Labor * health ', data=df).fit()
print(model.summary())
sm.stats.anova_lm(model)

                            OLS Regression Results                            
Dep. Variable:              Mortality   R-squared:                       0.998
Model:                            OLS   Adj. R-squared:                  0.996
Method:                 Least Squares   F-statistic:                     426.1
Date:                Mon, 07 Nov 2022   Prob (F-statistic):           1.16e-07
Time:                        23:50:44   Log-Likelihood:               -0.39099
No. Observations:                  14   AIC:                             16.78
Df Residuals:                       6   BIC:                             21.89
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept              1075.47



Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
Literacy,1.0,412.311533,412.311533,2854.074537,2.887445e-09
Labor,1.0,4.376534,4.376534,30.294945,0.001509113
Literacy:Labor,1.0,0.842607,0.842607,5.832633,0.05221123
health,1.0,8.099188,8.099188,56.063643,0.0002932256
Literacy:health,1.0,4.604292,4.604292,31.871515,0.001324424
Labor:health,1.0,0.267232,0.267232,1.849812,0.2226863
Literacy:Labor:health,1.0,0.388972,0.388972,2.692515,0.15193
Residual,6.0,0.866785,0.144464,,


## Q1 result

R2 score is pretty high. Variables explain model very good and residual is small enough. According to %95 confidence intervals variables signs are not stable. But i didn't want to remove the variables which describe the model. I just tried to stabilize literacy which is the most important variable that explains model.

In [315]:
model = ols('Mortality ~ Literacy * Labor + health ', data=df).fit()
print(model.summary())
sm.stats.anova_lm(model)

                            OLS Regression Results                            
Dep. Variable:              Mortality   R-squared:                       0.986
Model:                            OLS   Adj. R-squared:                  0.980
Method:                 Least Squares   F-statistic:                     156.3
Date:                Mon, 07 Nov 2022   Prob (F-statistic):           2.63e-08
Time:                        23:50:46   Log-Likelihood:                -14.081
No. Observations:                  14   AIC:                             38.16
Df Residuals:                       9   BIC:                             41.36
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept        -22.9930    132.626     -0.



Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
Literacy,1.0,412.311533,412.311533,605.62,1.447968e-09
Labor,1.0,4.376534,4.376534,6.428432,0.03194917
Literacy:Labor,1.0,0.842607,0.842607,1.237655,0.2947548
health,1.0,8.099188,8.099188,11.896418,0.007284845
Residual,9.0,6.127281,0.680809,,


# Q2

In [316]:
Gold = yf.download('GC=F')
Silver = yf.download('SI=F')
Copper = yf.download('HG=F')
Platinum = yf.download('PL=F')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [317]:
Gold_close=Gold['Close']
Silver_open=Silver['Open']
Copper_open=Copper['Open']
Platinum_open=Platinum['Open']

In [318]:
df=pd.concat([Gold_close, Silver_open,Copper_open,Platinum_open], axis=1).dropna()
df.columns=['Gold_close','Silver_open','Copper_open','Platinum_open']

In [319]:
df

Unnamed: 0_level_0,Gold_close,Silver_open,Copper_open,Platinum_open
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-08-30 00:00:00-04:00,273.899994,4.950000,0.8790,593.900024
2000-08-31 00:00:00-04:00,278.299988,4.920000,0.8850,589.000000
2000-09-01 00:00:00-04:00,277.000000,5.035000,0.8780,588.000000
2000-09-05 00:00:00-04:00,275.799988,4.990000,0.8960,602.000000
2000-09-06 00:00:00-04:00,274.200012,5.000000,0.9050,603.000000
...,...,...,...,...
2022-11-01 00:00:00-04:00,1645.000000,19.125000,3.4945,959.799988
2022-11-02 00:00:00-04:00,1645.699951,19.780001,3.4985,960.200012
2022-11-03 00:00:00-04:00,1627.300049,19.235001,3.4455,933.400024
2022-11-04 00:00:00-04:00,1672.500000,19.980000,3.6370,969.799988


In [320]:
model = ols('Gold_close ~ Silver_open * Copper_open * Platinum_open', data=df).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:             Gold_close   R-squared:                       0.957
Model:                            OLS   Adj. R-squared:                  0.957
Method:                 Least Squares   F-statistic:                 1.529e+04
Date:                Mon, 07 Nov 2022   Prob (F-statistic):               0.00
Time:                        23:50:50   Log-Likelihood:                -29674.
No. Observations:                4865   AIC:                         5.936e+04
Df Residuals:                    4857   BIC:                         5.942e+04
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                                            coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------------------

In [321]:
sm.stats.anova_lm(model)

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
Silver_open,1.0,985618300.0,985618300.0,84636.959645,0.0
Copper_open,1.0,36393210.0,36393210.0,3125.155743,0.0
Silver_open:Copper_open,1.0,70493760.0,70493760.0,6053.435763,0.0
Platinum_open,1.0,131551700.0,131551700.0,11296.598232,0.0
Silver_open:Platinum_open,1.0,17071170.0,17071170.0,1465.934747,1.547806e-280
Copper_open:Platinum_open,1.0,3916590.0,3916590.0,336.325187,1.089672e-72
Silver_open:Copper_open:Platinum_open,1.0,1213660.0,1213660.0,104.219369,3.176283e-24
Residual,4857.0,56560970.0,11645.25,,


### Silver closing price

In [322]:
Silver_close=Silver['Close']
Gold_open=Gold['Open']
Copper_open=Copper['Open']
Platinum_open=Platinum['Open']
df=pd.concat([Silver_close, Gold_open,Copper_open,Platinum_open], axis=1).dropna()
df.columns=['Silver_close','Gold_open','Copper_open','Platinum_open']

In [323]:
model = ols('Silver_close ~ Gold_open * Copper_open * Platinum_open', data=df).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:           Silver_close   R-squared:                       0.972
Model:                            OLS   Adj. R-squared:                  0.972
Method:                 Least Squares   F-statistic:                 2.431e+04
Date:                Mon, 07 Nov 2022   Prob (F-statistic):               0.00
Time:                        23:50:58   Log-Likelihood:                -8683.6
No. Observations:                4865   AIC:                         1.738e+04
Df Residuals:                    4857   BIC:                         1.744e+04
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                                          coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------------------
In

In [324]:
sm.stats.anova_lm(model)

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
Gold_open,1.0,275713.050883,275713.050883,132390.617583,0.0
Copper_open,1.0,20673.529903,20673.529903,9926.919973,0.0
Gold_open:Copper_open,1.0,3740.642554,3740.642554,1796.164441,0.0
Platinum_open,1.0,38477.292015,38477.292015,18475.84812,0.0
Gold_open:Platinum_open,1.0,11323.812098,11323.812098,5437.41572,0.0
Copper_open:Platinum_open,1.0,104.354998,104.354998,50.1087,1.662265e-12
Gold_open:Copper_open:Platinum_open,1.0,4399.749779,4399.749779,2112.6515,0.0
Residual,4857.0,10115.054319,2.082572,,


### Copper closing price

In [325]:
Copper_close=Copper['Close']
Gold_open=Gold['Open']
Silver_open=Silver['Open']
Platinum_open=Platinum['Open']
df=pd.concat([Copper_close, Gold_open,Copper_open,Platinum_open], axis=1).dropna()
df.columns=['Copper_close','Gold_open','Silver_open','Platinum_open']

In [326]:
model = ols('Copper_close ~ Gold_open * Silver_open * Platinum_open', data=df).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:           Copper_close   R-squared:                       0.999
Model:                            OLS   Adj. R-squared:                  0.999
Method:                 Least Squares   F-statistic:                 5.903e+05
Date:                Mon, 07 Nov 2022   Prob (F-statistic):               0.00
Time:                        23:51:01   Log-Likelihood:                 8991.4
No. Observations:                4865   AIC:                        -1.797e+04
Df Residuals:                    4857   BIC:                        -1.791e+04
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                                          coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------------------
In

In [327]:
sm.stats.anova_lm(model)

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
Gold_open,1.0,4140.915989,4140.915989,2845553.0,0.0
Silver_open,1.0,1872.428729,1872.428729,1286695.0,0.0
Gold_open:Silver_open,1.0,0.005467,0.005467,3.756896,0.052648
Platinum_open,1.0,0.00027,0.00027,0.1855658,0.666652
Gold_open:Platinum_open,1.0,0.018405,0.018405,12.64759,0.00038
Silver_open:Platinum_open,1.0,0.000624,0.000624,0.4288173,0.512601
Gold_open:Silver_open:Platinum_open,1.0,0.000624,0.000624,0.4285132,0.51275
Residual,4857.0,7.06802,0.001455,,


### Platinum closing price

In [328]:
Platinum_close=Platinum['Close']
Gold_open=Gold['Open']
Silver_open=Silver['Open']
Copper_open=Copper['Open']
df=pd.concat([Platinum_close, Gold_open,Copper_open,Silver_open], axis=1).dropna()
df.columns=['Platinum_close','Gold_open','Copper_open','Silver_open']

In [329]:
model = ols('Platinum_close ~ Gold_open * Silver_open * Copper_open', data=df).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:         Platinum_close   R-squared:                       0.871
Model:                            OLS   Adj. R-squared:                  0.871
Method:                 Least Squares   F-statistic:                     4695.
Date:                Mon, 07 Nov 2022   Prob (F-statistic):               0.00
Time:                        23:51:03   Log-Likelihood:                -30636.
No. Observations:                4865   AIC:                         6.129e+04
Df Residuals:                    4857   BIC:                         6.134e+04
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------
Interc

In [330]:
sm.stats.anova_lm(model)

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
Gold_open,1.0,163620500.0,163620500.0,9461.506034,0.0
Silver_open,1.0,283368300.0,283368300.0,16386.033275,0.0
Gold_open:Silver_open,1.0,95759000.0,95759000.0,5537.351541,0.0
Copper_open,1.0,14277270.0,14277270.0,825.596345,8.037404999999999e-168
Gold_open:Copper_open,1.0,1111405.0,1111405.0,64.267993,1.349494e-15
Silver_open:Copper_open,1.0,6507264.0,6507264.0,376.288475,8.508546e-81
Gold_open:Silver_open:Copper_open,1.0,3677464.0,3677464.0,212.652718,3.554063e-47
Residual,4857.0,83993490.0,17293.29,,


## Result

The model in which we examined the closing price of copper is seen as the best. 𝑅2 score higher than others. The difference between the coefficients of the independent variables according to the 95% confidence intervals is not much and they have the same signs.

# Q3

In [331]:
def get_ticker(data):
    lst=[]
    data1 = yf.download(data)
    data1=data1[['Open','Close']]
    for i in data1.index:
        if data1.loc[i]['Close']>data1.loc[i]['Open']:
            lst.append(1)
        else:
            lst.append(0)
    data1['result']=lst
    return data1

In [332]:
Gold=get_ticker('GC=F')
silver_open=get_ticker('SI=F')['Open']
copper_open=get_ticker('HG=F')['Open']
platinum_open=get_ticker('PL=F')['Open']

[*********************100%***********************]  1 of 1 completed


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1['result']=lst


[*********************100%***********************]  1 of 1 completed


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1['result']=lst


[*********************100%***********************]  1 of 1 completed


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1['result']=lst


[*********************100%***********************]  1 of 1 completed


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1['result']=lst


In [333]:
df3=pd.concat([Gold, Silver_open,copper_open,platinum_open], axis=1).dropna()
df3.columns=['Gold_open','Gold_close','Gold_result','Silver_open','Copper_open','Platinum_open']

In [334]:
df3

Unnamed: 0_level_0,Gold_open,Gold_close,Gold_result,Silver_open,Copper_open,Platinum_open
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-08-30 00:00:00-04:00,273.899994,273.899994,0.0,4.950000,0.8790,593.900024
2000-08-31 00:00:00-04:00,274.799988,278.299988,1.0,4.920000,0.8850,589.000000
2000-09-01 00:00:00-04:00,277.000000,277.000000,0.0,5.035000,0.8780,588.000000
2000-09-05 00:00:00-04:00,275.799988,275.799988,0.0,4.990000,0.8960,602.000000
2000-09-06 00:00:00-04:00,274.200012,274.200012,0.0,5.000000,0.9050,603.000000
...,...,...,...,...,...,...
2022-11-01 00:00:00-04:00,1630.800049,1645.000000,1.0,19.125000,3.4945,959.799988
2022-11-02 00:00:00-04:00,1650.800049,1645.699951,0.0,19.780001,3.4985,960.200012
2022-11-03 00:00:00-04:00,1629.199951,1627.300049,0.0,19.235001,3.4455,933.400024
2022-11-04 00:00:00-04:00,1630.199951,1672.500000,1.0,19.980000,3.6370,969.799988


In [335]:
model = logit('Gold_result ~ Gold_open * Silver_open * Copper_open * Platinum_open', data=df3).fit()
model.summary()

Optimization terminated successfully.
         Current function value: 0.666791
         Iterations 5


0,1,2,3
Dep. Variable:,Gold_result,No. Observations:,4865.0
Model:,Logit,Df Residuals:,4849.0
Method:,MLE,Df Model:,15.0
Date:,"Mon, 07 Nov 2022",Pseudo R-squ.:,0.02221
Time:,23:51:14,Log-Likelihood:,-3243.9
converged:,True,LL-Null:,-3317.6
Covariance Type:,nonrobust,LLR p-value:,7.91e-24

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-2.6255,1.317,-1.993,0.046,-5.208,-0.043
Gold_open,0.0090,0.003,3.166,0.002,0.003,0.015
Silver_open,-0.3046,0.385,-0.790,0.429,-1.060,0.451
Gold_open:Silver_open,6.458e-05,0.000,0.273,0.785,-0.000,0.001
Copper_open,-1.9343,1.060,-1.825,0.068,-4.011,0.143
Gold_open:Copper_open,-0.0009,0.001,-0.850,0.395,-0.003,0.001
Silver_open:Copper_open,0.2034,0.107,1.893,0.058,-0.007,0.414
Gold_open:Silver_open:Copper_open,-6.899e-05,6.72e-05,-1.026,0.305,-0.000,6.28e-05
Platinum_open,0.0035,0.002,1.906,0.057,-9.88e-05,0.007


### Silver closing price

In [336]:
Gold_open=Gold['Open']
silver=get_ticker('SI=F')

[*********************100%***********************]  1 of 1 completed


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1['result']=lst


In [337]:
df3=pd.concat([silver, Gold_open,copper_open,platinum_open], axis=1).dropna()
df3.columns=['silver_open','silver_close','silver_result','Gold_open','Copper_open','Platinum_open']

In [338]:
model = logit('silver_result ~ Gold_open * silver_open * Copper_open * Platinum_open', data=df3).fit()
model.summary()

Optimization terminated successfully.
         Current function value: 0.627076
         Iterations 5


0,1,2,3
Dep. Variable:,silver_result,No. Observations:,4865.0
Model:,Logit,Df Residuals:,4849.0
Method:,MLE,Df Model:,15.0
Date:,"Mon, 07 Nov 2022",Pseudo R-squ.:,0.01987
Time:,23:51:20,Log-Likelihood:,-3050.7
converged:,True,LL-Null:,-3112.6
Covariance Type:,nonrobust,LLR p-value:,3.652e-19

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-5.0543,1.380,-3.663,0.000,-7.759,-2.350
Gold_open,0.0082,0.003,2.823,0.005,0.002,0.014
silver_open,0.2209,0.402,0.550,0.582,-0.566,1.008
Gold_open:silver_open,-0.0002,0.000,-0.999,0.318,-0.001,0.000
Copper_open,1.9374,1.101,1.760,0.078,-0.220,4.095
Gold_open:Copper_open,-0.0024,0.001,-2.169,0.030,-0.005,-0.000
silver_open:Copper_open,-0.1012,0.113,-0.899,0.369,-0.322,0.119
Gold_open:silver_open:Copper_open,8.044e-05,6.99e-05,1.151,0.250,-5.66e-05,0.000
Platinum_open,0.0041,0.002,2.146,0.032,0.000,0.008


### Copper closing price

In [339]:
copper=get_ticker('HG=F')

[*********************100%***********************]  1 of 1 completed


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1['result']=lst


In [340]:
df3=pd.concat([copper, Gold_open,silver_open,platinum_open], axis=1).dropna()
df3.columns=['copper_open','copper_close','copper_result','Gold_open','silver_open','Platinum_open']

In [341]:
model = logit('copper_result ~ Gold_open * silver_open * copper_open * Platinum_open', data=df3).fit()
model.summary()

Optimization terminated successfully.
         Current function value: 0.687446
         Iterations 4


0,1,2,3
Dep. Variable:,copper_result,No. Observations:,4865.0
Model:,Logit,Df Residuals:,4849.0
Method:,MLE,Df Model:,15.0
Date:,"Mon, 07 Nov 2022",Pseudo R-squ.:,0.00794
Time:,23:51:23,Log-Likelihood:,-3344.4
converged:,True,LL-Null:,-3371.2
Covariance Type:,nonrobust,LLR p-value:,3.141e-06

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-4.3400,1.286,-3.375,0.001,-6.860,-1.819
Gold_open,0.0075,0.003,2.696,0.007,0.002,0.013
silver_open,0.0183,0.381,0.048,0.962,-0.729,0.766
Gold_open:silver_open,-8.315e-05,0.000,-0.354,0.723,-0.001,0.000
copper_open,1.6178,1.032,1.568,0.117,-0.405,3.641
Gold_open:copper_open,-0.0024,0.001,-2.236,0.025,-0.004,-0.000
silver_open:copper_open,-0.0275,0.106,-0.260,0.795,-0.235,0.180
Gold_open:silver_open:copper_open,4.61e-05,6.66e-05,0.692,0.489,-8.44e-05,0.000
Platinum_open,0.0058,0.002,3.294,0.001,0.002,0.009


### Platinum closing price

In [342]:
platinum=get_ticker('PL=F')

[*********************100%***********************]  1 of 1 completed


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1['result']=lst


In [343]:
df3=pd.concat([platinum, Gold_open,silver_open,copper_open], axis=1).dropna()
df3.columns=['platinum_open','platinum_close','platinum_result','Gold_open','silver_open','copper_open']

In [344]:
model = logit('platinum_result ~ Gold_open * silver_open * copper_open * platinum_open', data=df3).fit()
model.summary()

Optimization terminated successfully.
         Current function value: 0.526446
         Iterations 6


0,1,2,3
Dep. Variable:,platinum_result,No. Observations:,4865.0
Model:,Logit,Df Residuals:,4849.0
Method:,MLE,Df Model:,15.0
Date:,"Mon, 07 Nov 2022",Pseudo R-squ.:,0.09111
Time:,23:51:26,Log-Likelihood:,-2561.2
converged:,True,LL-Null:,-2817.9
Covariance Type:,nonrobust,LLR p-value:,7.852e-100

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-3.3828,1.503,-2.251,0.024,-6.329,-0.437
Gold_open,0.0083,0.004,2.306,0.021,0.001,0.015
silver_open,0.4692,0.461,1.017,0.309,-0.435,1.374
Gold_open:silver_open,-0.0007,0.000,-2.461,0.014,-0.001,-0.000
copper_open,0.0773,1.217,0.064,0.949,-2.308,2.463
Gold_open:copper_open,-0.0031,0.001,-2.188,0.029,-0.006,-0.000
silver_open:copper_open,-0.0849,0.124,-0.687,0.492,-0.327,0.157
Gold_open:silver_open:copper_open,0.0002,8.12e-05,2.899,0.004,7.63e-05,0.000
platinum_open,0.0036,0.002,1.850,0.064,-0.000,0.007


##  Q3 Result

when we compare all models, the platinum model I built last is the best. R2 score is higher than others and has more consistant variables. But it is not a good model either. R2 is very low. independent variables fail to explain the model.

# Q4

In [345]:
credit = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/credit-screening/crx.data', header=None)

fn = {'+': 1, '-': 0}

X = credit.replace('?',0).iloc[:,[1,2,7,10,14]]
y = credit.iloc[:,15].map(lambda x: fn.get(x,0))

In [346]:
credit

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,b,30.83,0.000,u,g,w,v,1.25,t,t,1,f,g,00202,0,+
1,a,58.67,4.460,u,g,q,h,3.04,t,t,6,f,g,00043,560,+
2,a,24.50,0.500,u,g,q,h,1.50,t,f,0,f,g,00280,824,+
3,b,27.83,1.540,u,g,w,v,3.75,t,t,5,t,g,00100,3,+
4,b,20.17,5.625,u,g,w,v,1.71,t,f,0,f,s,00120,0,+
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
685,b,21.08,10.085,y,p,e,h,1.25,f,f,0,f,g,00260,0,-
686,a,22.67,0.750,u,g,c,v,2.00,f,t,2,t,g,00200,394,-
687,a,25.25,13.500,y,p,ff,ff,2.00,f,t,1,t,g,00200,1,-
688,b,17.92,0.205,u,g,aa,v,0.04,f,f,0,f,g,00280,750,-


In [347]:
df4=pd.concat([y,X],axis=1)

In [348]:
df4.columns=['onbes','bir','iki','yedi','on','ondort']

In [349]:
df4.corr()

Unnamed: 0,onbes,iki,yedi,on,ondort
onbes,1.0,0.206294,0.322475,0.40641,0.175657
iki,0.206294,1.0,0.298902,0.271207,0.123121
yedi,0.322475,0.298902,1.0,0.32233,0.051345
on,0.40641,0.271207,0.32233,1.0,0.063692
ondort,0.175657,0.123121,0.051345,0.063692,1.0


In [350]:
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.75)
model = LogisticRegression(max_iter=1500)
model.fit(X_train,y_train)
y_predict = model.predict(X_test)
confusion_matrix(y_test,y_predict)

array([[85, 12],
       [29, 47]], dtype=int64)

In [351]:
def bootstrap(X,y,model):
    res = []
    for i in range(100):
        X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.75)
        model.fit(X_train,y_train)
        res.append(model.score(X_test,y_test))
    tmp = sorted(res)[3:97]
    return (min(tmp),max(tmp))

In [352]:
bootstrap(X,y,model)

(0.7052023121387283, 0.8208092485549133)