In [8]:
import numpy as np
import scipy
import scipy.stats
import pandas

def mann_whitney_plus_means(filename):
    '''
    This function will consume the turnstile_weather dataframe containing
    our final turnstile weather data. 
    
    You will want to take the means and run the Mann Whitney U-test on the 
    ENTRIESn_hourly column in the turnstile_weather dataframe.
    
    This function should return:
        1) the mean of entries with rain
        2) the mean of entries without rain
        3) the Mann-Whitney U-statistic and p-value comparing the number of entries
           with rain and the number of entries without rain
    
    You should feel free to use scipy's Mann-Whitney implementation, and you 
    might also find it useful to use numpy's mean function.
    '''
    turnstile_weather = pandas.read_csv(filename)
    
    rain = turnstile_weather['ENTRIESn_hourly'][turnstile_weather['rain'] == 1]
    rain_mean = np.mean(rain)
    
    without_rain = turnstile_weather['ENTRIESn_hourly'][turnstile_weather['rain'] == 0]
    without_rain_mean = np.mean(without_rain)
        
    U, p_value = scipy.stats.mannwhitneyu(rain, without_rain)
   
    
    return rain_mean, without_rain_mean, U, p_value

mann_whitney_plus_means('turnstile_data_master_with_weather.csv')

(1105.4463767458733, 1090.278780151855, 1924409167.0, 0.024940392294493356)