In [1]:
import pandas as pd 
import os 

In [2]:
country_dict = {
    "USA": "2016_american",
    "Germany": "2016_german",
    "France": "2016_french",
    "GB": "2016_british",
    "Russia": "2016_russian",
} 

def print_latex(df, col_format="|c|c|c|c|c|"):
    """ Prints the latex syntax equivalent to the passed dataframe
    :param df: Pandas dataframe 
    :col_format : String indicating the format of columns
    """
    df = df.round(2)
    latex = df.to_latex(column_format=col_format, index=False).replace('toprule',
                                            "hline").replace('midrule',
                                            "hline").replace('bottomrule',
                                            "hline").replace("\\\\\n",
                                            "\\\\\n\\hline").replace("\hline\hline","\hline")
    print(latex)
    
def country_correlations(country_dict, path, features, target):
    """ Returns dataframe with correlation coeficients 
    :param country_dict: country_name > file_name 
    :param path : Where country files are stored
    :param features : list of column names 
    :param target: target column name
    :return data_frame: Dataframe with correlatiosn for each country in respect to the target
    """
    lst = []
    for k,v in country_dict.items():
        df = pd.read_pickle(os.path.join(path,v))
        correlation_df = df[features].corr().head(len(features)-1)[target]
        correlation_df.name = k
        lst.append(correlation_df)
    df = pd.concat(lst,axis=1)
    return df

In [3]:
path = "../data/final_sets/countries/model_large"
features = ["efficiency", "eig_central", "in_degree", "k_core", "out_degree", "views"]

In [8]:
def country_correlations(country_dict, path, features, target):
    lst = []
    print(features[:-1])
    lst.append(pd.DataFrame(features[:-1], columns=["views"],index=features[:-1]).head(len(features)-1))
    for k,v in country_dict.items():
        df = pd.read_pickle(os.path.join(path,v))
        correlation_df = df[features].corr().head(len(features)-1)[target]
        correlation_df.name = k
        lst.append(correlation_df)
    df = pd.concat(lst,axis=1)
    return df

In [9]:
country_correlations(country_dict, path, features, "views")

['efficiency', 'eig_central', 'in_degree', 'k_core', 'out_degree']


Unnamed: 0,views,USA,Germany,France,GB,Russia
efficiency,efficiency,0.023479,0.035232,0.037495,0.025201,0.070529
eig_central,eig_central,0.471653,0.785431,0.286714,0.580289,0.864336
in_degree,in_degree,0.43966,0.903796,0.50157,0.589932,0.961047
k_core,k_core,0.280151,0.35519,0.270164,0.427128,0.652708
out_degree,out_degree,0.259269,0.325702,0.266777,0.364478,0.578643


In [10]:
print_latex(country_correlations(country_dict, path, features, "views"))

['efficiency', 'eig_central', 'in_degree', 'k_core', 'out_degree']
\begin{tabular}{|c|c|c|c|c|}
\hline
       views &   USA &  Germany &  France &    GB &  Russia \\
\hline
  efficiency &  0.02 &     0.04 &    0.04 &  0.03 &    0.07 \\
\hline eig\_central &  0.47 &     0.79 &    0.29 &  0.58 &    0.86 \\
\hline   in\_degree &  0.44 &     0.90 &    0.50 &  0.59 &    0.96 \\
\hline      k\_core &  0.28 &     0.36 &    0.27 &  0.43 &    0.65 \\
\hline  out\_degree &  0.26 &     0.33 &    0.27 &  0.36 &    0.58 \\
\hline
\end{tabular}



In [None]:
[1,2,3][:-1]