In [56]:
import pandas as pd
from scipy import stats
from matplotlib import pyplot as plt

In [19]:
def get_mean_std_and_range(df: pd.DataFrame):
    """
    @param df: a DataFrame containing information about the mean, standard deviation, minimum, and maximum value 
    of a dataset
    @return: returns the mean, standard deviation, and range of the dataset
    """
    return df['mean'], df['std'], df['max'] - df['min']

In [24]:
def calc_summary_statistics(df: pd.DataFrame):
    """
    @param df: a DataFrame containing two columns, one containing x values and the other containing y values
    @return: the mean, standard deviation, and range for the x and y values, and the correlation coefficient for the x 
    and y values
    """
    data_info = df.describe()
    x_mean, x_std, x_val_range = get_mean_std_and_range(data_info['x'])
    y_mean, y_std, y_val_range = get_mean_std_and_range(data_info['y'])
    corr_coefficient = stats.linregress(df['x'], df['y']).rvalue
    return {"x_mean": x_mean,
            "x_std": x_std,
            "x_range": x_val_range,
            "y_mean": y_mean,
            "y_std": y_std,
            "y_range": y_val_range,
            "correlation coefficient": corr_coefficient}
    

In [27]:
def read_csv_and_calc_statistics(file_name):
    """
    @param file_name: name of the file which contains x and y values
    @param: returns the mean, standard deviation, and range for the x and y values, and the correlation coefficient for the x 
    and y values
    """
    data = pd.read_csv(file_name)
    return calc_summary_statistics(data)

In [48]:
all_names = map(lambda val: "data-{}.csv".format(val), range(1, 7))

In [49]:
list(map(read_csv_and_calc_statistics, all_names))

[{'x_mean': 8.71769618641382,
  'x_std': 12.15551723122202,
  'x_range': 57.87293327786024,
  'y_mean': 12.522874034702584,
  'y_std': 6.63996400756785,
  'y_range': 29.94924044421798,
  'correlation coefficient': 0.9503256954375483},
 {'x_mean': -0.7107871267159573,
  'x_std': 11.481583587790734,
  'x_range': 60.9181134493271,
  'y_mean': -0.7489869764497991,
  'y_std': 6.257917998755848,
  'y_range': 32.769118752736546,
  'correlation coefficient': 0.9551531665750373},
 {'x_mean': 0.9689074904200939,
  'x_std': 2.235557861057969,
  'x_range': 6.967949247227911,
  'y_mean': 8.508535236641327,
  'y_std': 1.1493008110024525,
  'y_range': 3.9101365324125537,
  'correlation coefficient': 0.9819372646760659},
 {'x_mean': 49.90508614830509,
  'x_std': 28.50861051988193,
  'x_range': 87.3767,
  'y_mean': 18.553836525423726,
  'y_std': 16.545896442941682,
  'y_range': 44.56504,
  'correlation coefficient': -0.08119306735490815},
 {'x_mean': 49.90999357843095,
  'x_std': 28.500000943801354,
  