# Introduction
This uses OLS Regression to look for relationships between instances of the VT in rows and normalized frequencies in movements.

## Setup

In [1]:
import os
import csv
import itertools as it
import statistics
import pandas as pd
import math
import statsmodels.api as sm

  import pandas.util.testing as tm


In [2]:
normalised_data_url = 'https://raw.githubusercontent.com/anonymousmuso/Viennese-Trichord/main/Data/Normalized%20Values.csv'
normalised_data = pd.read_csv(normalised_data_url, error_bad_lines=False)
metadata_url = 'https://raw.githubusercontent.com/anonymousmuso/Viennese-Trichord/main/Data/Metadata.csv'
metadata = pd.read_csv(metadata_url, error_bad_lines=False)


In [3]:
## this orders the data

chronological_values = []
for i in normalised_data.index.values:
    for j in range(0, len(metadata.index.values)):
        temp_df = metadata.iloc[j:j+1, len(metadata.columns.values)-1:]
        if temp_df.index.values == i:
            chronological_values.append(float(temp_df.values))
normalised_data['Chronological Position'] = chronological_values
normalised_data = normalised_data.sort_values(by='Chronological Position').transpose().drop('Chronological Position').transpose().iloc[65:, :].drop('Op. 16 v').drop('Op. 16 i')

In [5]:
## this adds counts of VT instances in the rows

movement_data = [['Op. 17 i', 1], ['Op. 17 ii', 3], ['Op. 17 iii', 2], ['Op. 18 i', 5], ['Op. 18 ii', 1], ['Op. 18 iii', 2]] 
piece_data = [['Op. 19', 2], ['Op. 20', 4], ['Op. 21', 2], ['Op. 22', 2], ['Op. 27', 2], ['Op. 31', 1]]

list_of_vt_counts = []
for i in range(0, len(normalised_data.index)):
    temp_df = normalised_data.iloc[i:i+1, :]
    temp_title = temp_df.index.values
    temp_value = []
    for j in movement_data:
        if j[0] == temp_title:
            temp_value.append(j[1])
    for k in piece_data:
        if k[0] in str(temp_title):
            temp_value.append(k[1])
    if len(temp_value) == 1:
        pass
    else:
        temp_value.append(0)
    list_of_vt_counts.append(temp_value[0])
normalised_data['Row VT Count'] = list_of_vt_counts

In [22]:
## this adds topography data
# here dummy variable: 0 = block; 1 = combination; 2 = linear
topography_data = [0, 0, 0, 0, 0, 2, 1, 1, 1, 1, 2, 2, 2, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
block_topography = []
combined_topography = []
linear_topography = []
for i in topography_data:
    if i == 0:
        block_topography.append(1)
        combined_topography.append(0)
        linear_topography.append(0)
    if i == 1:
        block_topography.append(0)
        combined_topography.append(1)
        linear_topography.append(0)
    if i == 2:
        block_topography.append(0)
        combined_topography.append(0)
        linear_topography.append(1)
normalised_data['Block Topography'] = block_topography
normalised_data['Combined Topography'] = combined_topography
normalised_data['Linear Topography'] = linear_topography

# To Predict Vertical Normalized Values

In [7]:
X = normalised_data[["Row VT Count"]]
y = normalised_data["Vertical Normalised Total"]

# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

0,1,2,3
Dep. Variable:,Vertical Normalised Total,R-squared (uncentered):,0.596
Model:,OLS,Adj. R-squared (uncentered):,0.586
Method:,Least Squares,F-statistic:,57.65
Date:,"Tue, 22 Dec 2020",Prob (F-statistic):,3.34e-09
Time:,18:17:53,Log-Likelihood:,2.3838
No. Observations:,40,AIC:,-2.768
Df Residuals:,39,BIC:,-1.079
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Row VT Count,0.1614,0.021,7.593,0.000,0.118,0.204

0,1,2,3
Omnibus:,1.24,Durbin-Watson:,1.811
Prob(Omnibus):,0.538,Jarque-Bera (JB):,0.921
Skew:,0.369,Prob(JB):,0.631
Kurtosis:,2.918,Cond. No.,1.0


# To Predict Linear Normalized Values with Topography Data

In [8]:
X = normalised_data[["Row VT Count"]]
y = normalised_data["Linear Normalised Total"]

# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

0,1,2,3
Dep. Variable:,Linear Normalised Total,R-squared (uncentered):,0.614
Model:,OLS,Adj. R-squared (uncentered):,0.604
Method:,Least Squares,F-statistic:,62.0
Date:,"Tue, 22 Dec 2020",Prob (F-statistic):,1.39e-09
Time:,18:17:53,Log-Likelihood:,17.0
No. Observations:,40,AIC:,-32.0
Df Residuals:,39,BIC:,-30.31
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Row VT Count,0.1161,0.015,7.874,0.000,0.086,0.146

0,1,2,3
Omnibus:,7.481,Durbin-Watson:,1.59
Prob(Omnibus):,0.024,Jarque-Bera (JB):,7.043
Skew:,0.655,Prob(JB):,0.0296
Kurtosis:,4.583,Cond. No.,1.0


# To Predict Linear Normalized Values with Topography Data

In [16]:
X = topography_df[["Row VT Count", "Linear Topography", "Block Topography"]]
y = topography_df["Linear Normalised Total"]

# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

0,1,2,3
Dep. Variable:,Linear Normalised Total,R-squared (uncentered):,0.67
Model:,OLS,Adj. R-squared (uncentered):,0.644
Method:,Least Squares,F-statistic:,25.08
Date:,"Tue, 22 Dec 2020",Prob (F-statistic):,4.98e-09
Time:,18:20:34,Log-Likelihood:,20.163
No. Observations:,40,AIC:,-34.33
Df Residuals:,37,BIC:,-29.26
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Row VT Count,0.1042,0.016,6.528,0.000,0.072,0.136
Linear Topography,0.0038,0.037,0.103,0.919,-0.071,0.079
Block Topography,0.1120,0.045,2.508,0.017,0.022,0.202

0,1,2,3
Omnibus:,17.525,Durbin-Watson:,2.221
Prob(Omnibus):,0.0,Jarque-Bera (JB):,34.321
Skew:,1.04,Prob(JB):,3.53e-08
Kurtosis:,7.033,Cond. No.,3.34


In [17]:
X = topography_df[["Row VT Count", "Block Topography"]]
y = topography_df["Linear Normalised Total"]

# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

0,1,2,3
Dep. Variable:,Linear Normalised Total,R-squared (uncentered):,0.67
Model:,OLS,Adj. R-squared (uncentered):,0.653
Method:,Least Squares,F-statistic:,38.62
Date:,"Tue, 22 Dec 2020",Prob (F-statistic):,7e-10
Time:,18:20:34,Log-Likelihood:,20.157
No. Observations:,40,AIC:,-36.31
Df Residuals:,38,BIC:,-32.94
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Row VT Count,0.1048,0.015,7.225,0.000,0.075,0.134
Block Topography,0.1114,0.044,2.549,0.015,0.023,0.200

0,1,2,3
Omnibus:,17.098,Durbin-Watson:,2.211
Prob(Omnibus):,0.0,Jarque-Bera (JB):,33.65
Skew:,1.004,Prob(JB):,4.93e-08
Kurtosis:,7.02,Cond. No.,3.2


# To Predict Vertical Normalized Values with Topography Data

In [19]:
X = topography_df[["Row VT Count", "Block Topography", "Linear Topography"]]
y = topography_df["Vertical Normalised Total"]

# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

0,1,2,3
Dep. Variable:,Vertical Normalised Total,R-squared (uncentered):,0.651
Model:,OLS,Adj. R-squared (uncentered):,0.623
Method:,Least Squares,F-statistic:,23.03
Date:,"Tue, 22 Dec 2020",Prob (F-statistic):,1.39e-08
Time:,18:21:42,Log-Likelihood:,5.303
No. Observations:,40,AIC:,-4.606
Df Residuals:,37,BIC:,0.4606
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Row VT Count,0.1346,0.023,5.819,0.000,0.088,0.181
Block Topography,0.0995,0.065,1.537,0.133,-0.032,0.231
Linear Topography,0.1091,0.053,2.040,0.049,0.001,0.217

0,1,2,3
Omnibus:,7.092,Durbin-Watson:,2.696
Prob(Omnibus):,0.029,Jarque-Bera (JB):,5.967
Skew:,0.91,Prob(JB):,0.0506
Kurtosis:,3.515,Cond. No.,3.34


In [20]:
X = topography_df[["Row VT Count", "Linear Topography"]]
y = topography_df["Vertical Normalised Total"]

# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

0,1,2,3
Dep. Variable:,Vertical Normalised Total,R-squared (uncentered):,0.629
Model:,OLS,Adj. R-squared (uncentered):,0.609
Method:,Least Squares,F-statistic:,32.21
Date:,"Tue, 22 Dec 2020",Prob (F-statistic):,6.58e-09
Time:,18:22:02,Log-Likelihood:,4.0645
No. Observations:,40,AIC:,-4.129
Df Residuals:,38,BIC:,-0.7512
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Row VT Count,0.1463,0.022,6.584,0.000,0.101,0.191
Linear Topography,0.0985,0.054,1.825,0.076,-0.011,0.208

0,1,2,3
Omnibus:,2.576,Durbin-Watson:,2.508
Prob(Omnibus):,0.276,Jarque-Bera (JB):,2.281
Skew:,0.572,Prob(JB):,0.32
Kurtosis:,2.753,Cond. No.,2.69


In [21]:
X = topography_df[["Row VT Count"]]
y = topography_df["Vertical Normalised Total"]

# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

0,1,2,3
Dep. Variable:,Vertical Normalised Total,R-squared (uncentered):,0.596
Model:,OLS,Adj. R-squared (uncentered):,0.586
Method:,Least Squares,F-statistic:,57.65
Date:,"Tue, 22 Dec 2020",Prob (F-statistic):,3.34e-09
Time:,18:22:13,Log-Likelihood:,2.3838
No. Observations:,40,AIC:,-2.768
Df Residuals:,39,BIC:,-1.079
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Row VT Count,0.1614,0.021,7.593,0.000,0.118,0.204

0,1,2,3
Omnibus:,1.24,Durbin-Watson:,2.112
Prob(Omnibus):,0.538,Jarque-Bera (JB):,0.921
Skew:,0.369,Prob(JB):,0.631
Kurtosis:,2.918,Cond. No.,1.0
