In [0]:
import seaborn as sns
import matplotlib.pyplot as plt



%matplotlib inline

sns.set_context('notebook')

%config InlineBackend.figure_format = 'retina'



In [0]:
def print_welcome_message():
    print("Welcome to The DataFrame Statistician!")

In [0]:
def validate_main_option_choice():
    
    #validates the users main option choice as a number from 1 - 4 
   
    # display list of choices
    print("Please choose from the following options:")
    print("    1 – Load data from a file")
    print("    2 – Analyse")
    print("    3 – Visualise")
    print("    4 – Quit")
   
    # Prompts the user for input
    while True:
        try:  # checks the input is a number
            user_choice = int(input("Please enter a number between 1 and 4: "))
        except ValueError:
            print("Try again! Make sure you enter a number.")
            continue
        if user_choice not in range(1, 5):  # checks the number is within the valid range
            print("Try again! Make sure the number you enter is between 1 and 4")
            continue
        else:
            return(user_choice)


In [0]:
def load_csv():
    
    from pandas import read_csv
    
    filepath = input("Please enter the file path?: ")
    
    # decide header row for the dataframe
    try:
        header = int(input("which row is the header (enter a number. 1 for the first row)? Insert 'N' if there is no row of headers: "))
        header -= 1 # correct index for implementation of header
    except ValueError:
        print("No valid row has been entered. No header will be applied")
        header = -1 # set index for no header
                
    # load data
    if header >= 0:
        try:
            myDataFrame = pd.read_csv(filepath, header = header, sep = ",")
            print("File loaded!")
            return(myDataFrame)
        except FileNotFoundError:
            print("Please enter a valid file path for a 'csv' document. Don't forget to put '.csv' on the end of the file name")
    else:
        try:
            myDataFrame = read_csv(filepath, header = None, sep = ",")
            print("File loaded!")
            return(myDataFrame)
        except FileNotFoundError:
            print("Please enter a valid file path for a 'csv' document. Don't forget to put '.csv' on the end of the file name")
     
    
        
    
    

In [0]:
def create_stats_report(data):
  
    
    # get only numeric data
    datanumeric = data._get_numeric_data()
    
    # store column names of data
    colnames = list(datanumeric.columns.values)
    
    # list column names with ordered index fr user
    i = 0
    for i in colnames:
        print(i+1,":", colnames[i])
        i += 1
    
  # Prompt user for the column they wish to analyse
    while True:
        try:
            col_choice = (int(input("Please select a column of data to analayse: ")))
        except ValueError:
            print("Please enter a valid number.")
            continue
        if col_choice < 0  or col_choice > len(colnames):
            print("Please enter a number corresponding to a column of data to analyse.")
            continue
        else:
            print("OK. Got it!")
            break
         
    col_name = colnames[col_choice-1]
    
    data_col = data[col_name]
    
    # Calculate variables
    count = len(data_col)
    average = round(data_col, 2)
    standdev = round(data_col, 2)

    from scipy import stats

    stderrMean = round(stats.sem(data_col))


    # Print report
    print("----------")
    print("Statistics for", colname)
    print("")
    print("Number of values (n): ", count)
    print("")
    print("Mean: ", average)
    print("")
    print("Standard Deviation: ", standdev)
    print("")
    print("Standard Error of the Mean: ", stderrMean)
    print("")
    print("--------------------")
    
    

In [0]:
def choose_graph():
    
    print("1: Line Graph")
    print("2: Bar Graph")
    print("3: Boxplot Graph")
    
    #initialize set of choices
    graphs = [1,2,3]
    
    # Prompt user for the graph type they would like
    while True:
        try:
            graph_choice = int(input("Which type of graph would you like to create? Enter the corresponding number!"))
        except ValueError:
            print("please enter a number")
            continue
        if graph_choice not in graphs:
            print("Please enter a number corresponding to a column of data to analyse.")
            continue
        else:
            print("OK. Got it!")
            break
            
     
    print("1: All columns on one plot")
    print("2: All columns on different plots")
    plot_possibilities = [1, 2]
    
    # ask for subplot choice
    while True:
        try:
            plot_choice = int(input("Would you like all columns on one plot or subplots? Enter the corresponding number"))
        except ValueError:
            print("please enter a number")
            continue
        if plot_choice not in plot_possibilities:
            print("Please enter either '1' for one plot or '2' for subplots")
            continue
        else:
            print("OK. Got it!")
            break
    
    return(graph_choice, plot_choice)

In [0]:
def plot_all(datanumeric, graph):
    
    title = input("What would you like the graph to be called? ")
    xaxis = input("What would you like the x-axis to be called? ")
    yaxis = input("What would you like the y-axis to be called? ")
    
    # choose plot
    print("Got it! Doing it for you now")
    if graph == 1:
        datanumeric.plot.line()
    elif graph == 2:
        datanumeric.plot.bar()
    elif graph == 3:
        datanumeric.boxplot()
        
    plt.show() 
    
    # name/customise plot
    plt.title(title)
    plt.xlabel(xaxis)
    plt.ylabel(yaxis)
    

In [0]:
def subplot_data(datanumeric, graph):
  
    title = input("What would you like the main title to be called? ")
       
    # choose plot
    if graph == 1:
        datanumeric.plot.line(subplots = True, title = title, grid=False, legend = False)
        
    if graph == 2:
        datanumeric.plot.bar(subplots = True, title = title, grid=False, legend = False)
        
    if graph == 3:
        datanumeric.plot.box(subplots = True, title = title, grid=False, legend = False)
        
    plt.show() 
    plt.subplots_adjust(hspace = 1, wspace = 1)

    

In [0]:
def plot_data(data, graph, plot):

    # get only numeric data
    datanumeric = data._get_numeric_data()
    
    # call the chosen plot function
    if plot == 1:
        plot_all(datanumeric, graph)
    elif plot == 2:
        subplot_data(datanumeric, graph)
    


In [0]:
def main():
# Displays the welcome menu and option list and calls funciton to carry out chosen task.

  print_welcome_message()

  # gets main option choice from user
  user_choice = validate_main_option_choice()
  
  #initialise emptry dataframe
  data = pd.DataFrame()
    
    
  # loop through main menu options.
  while user_choice != 4:
    if user_choice == 1:
      data_loaded = load_csv()
      data = data_loaded
    elif user_choice == 2:
          create_stats_report(data)
    elif user_choice == 3:
      graph, plot = choose_graph()
      plot_data(data, graph, plot)
    else: 
      print("Please load the data before choosing options 2 or 3")
      user_choice = validate_main_option_choice()
            
        
  print("You have quite the program")
          

In [13]:
main()

Welcome to The DataFrame Statistician!
Please choose from the following options:
    1 – Load data from a file
    2 – Analyse
    3 – Visualise
    4 – Quit
Please enter a number between 1 and 4: 2


NameError: ignored

https://stackoverflow.com/questions/25239933/how-to-add-title-to-subplots-in-matplotlib