In [1]:
import pandas as pd
from scipy import stats
from matplotlib import pyplot as plt

In [2]:
def get_mean_std_and_range(df: pd.DataFrame):
    """
    @param df: a DataFrame containing information about the mean, standard deviation, minimum, and maximum value 
    of a dataset
    @return: returns the mean, standard deviation, maximum, and minimum of the dataset
    """
    return df['mean'], df['std'], df['max'], df['min']

In [6]:
def calc_summary_statistics(df: pd.DataFrame):
    """
    @param df: a DataFrame containing two columns, one containing x values and the other containing y values
    @return: the mean, standard deviation, and range for the x and y values, and the correlation coefficient for the x 
    and y values
    """
    data_info = df.describe()
    x_mean, x_std, x_max, x_min = get_mean_std_and_range(data_info['x'])
    y_mean, y_std, y_max, y_min  = get_mean_std_and_range(data_info['y'])
    corr_coefficient = stats.linregress(df['x'], df['y']).rvalue
    return {"x_mean": x_mean,
            "x_std": x_std,
            "x_max": x_max,
            "x_min": x_min,
            "y_mean": y_mean,
            "y_std": y_std,
            "y_max": y_max,
            "y_min": y_min,
            "correlation coefficient": corr_coefficient}
    

In [7]:
def read_csv_and_calc_statistics(file_name):
    """
    @param file_name: name of the file which contains x and y values
    @param: returns the mean, standard deviation, and range for the x and y values, and the correlation coefficient for the x 
    and y values
    """
    data = pd.read_csv(file_name)
    return calc_summary_statistics(data)

In [8]:
all_names = map(lambda val: "data-{}.csv".format(val), range(1, 7))

In [9]:
list(map(read_csv_and_calc_statistics, all_names))

[{'x_mean': 8.71769618641382,
  'x_std': 12.15551723122202,
  'x_max': 35.23705548785129,
  'x_min': -22.635877790008944,
  'y_mean': 12.522874034702584,
  'y_std': 6.63996400756785,
  'y_max': 27.38415261152477,
  'y_min': -2.565087832693213,
  'correlation coefficient': 0.9503256954375483},
 {'x_mean': -0.7107871267159573,
  'x_std': 11.481583587790734,
  'x_max': 27.6470003722073,
  'x_min': -33.2711130771198,
  'y_mean': -0.7489869764497991,
  'y_std': 6.257917998755848,
  'y_max': 14.14731499231748,
  'y_min': -18.621803760419063,
  'correlation coefficient': 0.9551531665750373},
 {'x_mean': 0.9689074904200939,
  'x_std': 2.235557861057969,
  'x_max': 4.51888491755363,
  'x_min': -2.4490643296742807,
  'y_mean': 8.508535236641327,
  'y_std': 1.1493008110024525,
  'y_max': 10.551871022804317,
  'y_min': 6.641734490391763,
  'correlation coefficient': 0.9819372646760659},
 {'x_mean': 49.90508614830509,
  'x_std': 28.50861051988193,
  'x_max': 87.3767,
  'x_min': 0.0,
  'y_mean': 18.