In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('taxi_nyc.csv')

In [3]:
first_index = df.index[0]
print(first_index)

0


In [4]:
print(df.head(10)) 

             pickup_dt pickup_month        borough  pickups hday  spd   vsb  \
0  2015-01-01 01:00:00          Jan          Bronx      152    Y  5.0  10.0   
1  2015-01-01 01:00:00          Jan       Brooklyn     1519    Y  5.0  10.0   
2  2015-01-01 01:00:00          Jan            EWR        0    Y  5.0  10.0   
3  2015-01-01 01:00:00          Jan      Manhattan     5258    Y  5.0  10.0   
4  2015-01-01 01:00:00          Jan         Queens      405    Y  5.0  10.0   
5  2015-01-01 01:00:00          Jan  Staten Island        6    Y  5.0  10.0   
6  2015-01-01 01:00:00          Jan            NaN        4    Y  5.0  10.0   
7  2015-01-01 02:00:00          Jan          Bronx      120    Y  3.0  10.0   
8  2015-01-01 02:00:00          Jan       Brooklyn     1229    Y  3.0  10.0   
9  2015-01-01 02:00:00          Jan            EWR        0    Y  3.0  10.0   

   temp  dewp     slp  pcp 01  pcp 06  pcp 24   sd  
0  30.0   7.0  1023.5     0.0     0.0     0.0  0.0  
1  30.0   7.0  1023.5   

In [5]:
rows, columns = df.shape  
print(f"{rows} строк, {columns} столбцов")  

29101 строк, 14 столбцов


In [6]:
data_types = df.dtypes   
dominant_type = data_types.mode()
print(dominant_type)

0    float64
dtype: object


In [7]:
df.rename(columns={  
    'pcp 01': 'pcp_01',  
    'pcp 06': 'pcp_06',  
    'pcp 24': 'pcp_24'  
}, inplace=True)  

print(df.columns) 

Index(['pickup_dt', 'pickup_month', 'borough', 'pickups', 'hday', 'spd', 'vsb',
       'temp', 'dewp', 'slp', 'pcp_01', 'pcp_06', 'pcp_24', 'sd'],
      dtype='object')


In [8]:
borough_counts = df['borough'].value_counts()
manhattan_count = borough_counts.get('Manhattan', 0)  
print(f'Район Манхэттен встречается: {manhattan_count} раз(а).')  

Район Манхэттен встречается: 4343 раз(а).


In [9]:
total_pickups = df['pickups'].sum()
print(total_pickups)

14265773


In [10]:
grouped_data = df.groupby('borough')['pickups'].sum()  
max_borough = grouped_data.idxmax()  
max_pickups = grouped_data.max()  
print(f'Район с наибольшим количеством поездок: {max_borough}')

Район с наибольшим количеством поездок: Manhattan


In [11]:
grouped_data = df.groupby('borough')['pickups'].sum()
min_borough_index = grouped_data.idxmin()  
min_pickups = grouped_data.min() 
print(f'Район с наименьшим количеством поездок: {min_borough_index}')

Район с наименьшим количеством поездок: EWR


In [12]:
 df['borough'].unique()


array(['Bronx', 'Brooklyn', 'EWR', 'Manhattan', 'Queens', 'Staten Island',
       nan], dtype=object)

In [13]:
grouped_data = df.groupby('borough')['pickups'].sum()  
min_borough_index = grouped_data.idxmin()  
min_pickups = grouped_data.min() 
min_borough_index

'EWR'

In [14]:

# Преобразование столбца pickup_dt в формат datetime  
df['pickup_dt'] = pd.to_datetime(df['pickup_dt'])  

# Добавление столбца с днем недели  
df['day_of_week'] = df['pickup_dt'].dt.dayofweek  # 0 - понедельник, 6 - воскресенье  

# Условие для выходных (5 - суббота, 6 - воскресенье) и обычных дней (например, 0 - понедельник)  
weekend = df[df['day_of_week'].isin([5, 6])]  
weekdays = df[df['day_of_week'].isin([0, 1, 2, 3, 4])]  # Понедельник до Пятницы  

# Группировка данных по району и подсчет заказов  
weekend_orders = weekend.groupby('borough')['pickups'].sum()  
weekday_orders = weekdays.groupby('borough')['pickups'].sum()  

# Подсчет среднего числа заказов  
avg_weekend_orders = weekend_orders / weekend['borough'].nunique()  # Среднее для выходных  
avg_weekday_orders = weekday_orders / weekdays['borough'].nunique()  # Среднее для обычных дней  

# Сравнение и выбор районов с большим количеством заказов в выходные  
higher_orders = avg_weekend_orders[avg_weekend_orders > avg_weekday_orders]  

print("Районы с большим количеством заказов в выходные дни:")  
print(higher_orders)  

Районы с большим количеством заказов в выходные дни:
Series([], Name: pickups, dtype: float64)


In [15]:
print(df.columns)  

Index(['pickup_dt', 'pickup_month', 'borough', 'pickups', 'hday', 'spd', 'vsb',
       'temp', 'dewp', 'slp', 'pcp_01', 'pcp_06', 'pcp_24', 'sd',
       'day_of_week'],
      dtype='object')


In [16]:
df.shape

(29101, 15)

In [17]:
df.columns

Index(['pickup_dt', 'pickup_month', 'borough', 'pickups', 'hday', 'spd', 'vsb',
       'temp', 'dewp', 'slp', 'pcp_01', 'pcp_06', 'pcp_24', 'sd',
       'day_of_week'],
      dtype='object')

In [18]:
df[['pickup_month', 'pickups']].head()

Unnamed: 0,pickup_month,pickups
0,Jan,152
1,Jan,1519
2,Jan,0
3,Jan,5258
4,Jan,405


In [19]:
pickups_by_mndt = df \
    .groupby(['pickup_month', 'pickup_dt'], as_index=False) \
    .aggregate({'pickups': 'sum'}) \
    .sort_values('pickups', ascending=False)

In [20]:
pickups_by_mndt.to_csv('pickups_by_mndt.csv', index=False)

In [21]:
df.columns

Index(['pickup_dt', 'pickup_month', 'borough', 'pickups', 'hday', 'spd', 'vsb',
       'temp', 'dewp', 'slp', 'pcp_01', 'pcp_06', 'pcp_24', 'sd',
       'day_of_week'],
      dtype='object')

In [22]:
holiday_df = df[df['hday'] == 'Y']    
mean_pickups_by_borough = holiday_df.groupby('borough')['pickups'].mean()  
print("Среднее количество заказов в праздничные дни по районам:")  
print(mean_pickups_by_borough)  


Среднее количество заказов в праздничные дни по районам:
borough
Bronx              48.065868
Brooklyn          527.011976
EWR                 0.041916
Manhattan        2035.928144
Queens            320.730539
Staten Island       1.497006
Name: pickups, dtype: float64


In [24]:
result = df.groupby('borough')['pickups'].sum().reset_index()

In [25]:
result

Unnamed: 0,borough,pickups
0,Bronx,220047
1,Brooklyn,2321035
2,EWR,105
3,Manhattan,10367841
4,Queens,1343528
5,Staten Island,6957


In [27]:
pickups_all = df.groupby(['borough', 'pickup_month'])['pickups'].sum().reset_index()  
pickups_by_mon_bor = pickups_all.sort_values(by='pickups', ascending=False)  
print(pickups_by_mon_bor)  
print(pickups_by_mon_bor.shape) 

          borough pickup_month  pickups
21      Manhattan          Jun  1995388
23      Manhattan          May  1888800
19      Manhattan          Feb  1718571
22      Manhattan          Mar  1661261
18      Manhattan          Apr  1648278
20      Manhattan          Jan  1455543
9        Brooklyn          Jun   482466
11       Brooklyn          May   476087
6        Brooklyn          Apr   378095
10       Brooklyn          Mar   346726
7        Brooklyn          Feb   328650
8        Brooklyn          Jan   309011
27         Queens          Jun   286311
29         Queens          May   275893
28         Queens          Mar   219561
24         Queens          Apr   216857
25         Queens          Feb   185695
26         Queens          Jan   159211
5           Bronx          May    53037
3           Bronx          Jun    49006
0           Bronx          Apr    34617
4           Bronx          Mar    32232
1           Bronx          Feb    28694
2           Bronx          Jan    22461
