In [1]:
import pandas as pd
import numpy as np

# Assuming flights.csv has been loaded as flights dataframe
flights = pd.read_csv('flights.csv')

x = [1, 2, 3, 5, 7, 11, 13]
result_x = [i*2 for i in x]
print(result_x)

df = pd.DataFrame({'x': x})
df['y'] = df['x'] * 2

filtered_flights = flights[
    (flights['dep_time'] > 600) &
    (flights['dep_time'] < 2000) &
    (flights['arr_delay'].abs() < 20)
]

flights['daytime'] = (flights['dep_time'] > 600) & (flights['dep_time'] < 2000)
flights['approx_ontime'] = flights['arr_delay'].abs() < 20

filtered_flights_2 = flights[flights['daytime'] & flights['approx_ontime']]

x_values = [1/49*49, np.sqrt(2)**2]
print(x_values)

# Check near equivalence
print(np.isclose(x_values, [1, 2]))

print(np.NaN > 5)
print(10 == np.NaN)
print(np.NaN == np.NaN)

# Handling missing data using pandas functions
filtered_na = flights[flights['dep_time'].isna()]

flights_sorted = flights[
    (flights['month'] == 1) & (flights['day'] == 1)
].sort_values('dep_time')

flights_sorted_na_first = flights[
    (flights['month'] == 1) & (flights['day'] == 1)
].sort_values(['dep_time'], na_position='first')

df_bool = pd.DataFrame({'x': [True, False, np.NaN]})
df_bool['and'] = df_bool['x'] & np.NaN
df_bool['or'] = df_bool['x'] | np.NaN

nov_dec_flights = flights[(flights['month'] == 11) | (flights['month'] == 12)]

flights['nov'] = flights['month'] == 11
flights['final'] = flights['nov'] | 12

# Check if a value exists in a list
check_values = [value in [1, 5, 11] for value in range(1, 13)]
print(check_values)

letters = 'abcdefghijklmnopqrstuvwxyz'
vowels_check = [letter in ["a", "e", "i", "o", "u"] for letter in letters[:10]]
print(vowels_check)

flights_nov_dec = flights[flights['month'].isin([11, 12])]

# Grouping and aggregation
grouped_flights = flights.groupby(['year', 'month', 'day']).agg(
    all_delayed=pd.NamedAgg(column='dep_delay', aggfunc=lambda x: all((x <= 60) & ~pd.isna(x))),
    any_long_delay=pd.NamedAgg(column='arr_delay', aggfunc=lambda x: any((x >= 300) & ~pd.isna(x)))
).reset_index()

grouped_flights_2 = flights.groupby(['year', 'month', 'day']).agg(
    all_delayed=pd.NamedAgg(column='dep_delay', aggfunc=lambda x: (x <= 60).mean()),
    any_long_delay=pd.NamedAgg(column='arr_delay', aggfunc=lambda x: sum((x >= 300) & ~pd.isna(x)))
).reset_index()

grouped_flights_3 = flights[flights['arr_delay'] > 0].groupby(['year', 'month', 'day']).agg(
    behind=pd.NamedAgg(column='arr_delay', aggfunc='mean'),
    n=pd.NamedAgg(column='arr_delay', aggfunc='size')
).reset_index()

grouped_flights_4 = flights.groupby(['year', 'month', 'day']).agg(
    behind=pd.NamedAgg(column='arr_delay', aggfunc=lambda x: x[x > 0].mean()),
    ahead=pd.NamedAgg(column='arr_delay', aggfunc=lambda x: x[x < 0].mean()),
    n=pd.NamedAgg(column='arr_delay', aggfunc='size')
).reset_index()

x_values_2 = [-3, -2, -1, 0, 1, 2, 3, np.NaN]

positive = [value if value > 0 else '-ve' for value in x_values_2]
print(positive)

result_values = [
    "+ve" if value > 0 else "-ve" if value < 0 else "0" if value == 0 else "???" for value in x_values_2
]
print(result_values)


[2, 4, 6, 10, 14, 22, 26]
[0.9999999999999999, 2.0000000000000004]
[ True  True]
False
False
False
[True, False, False, False, True, False, False, False, False, False, True, False]
[True, False, False, False, True, False, False, False, True, False]
['-ve', '-ve', '-ve', '-ve', 1, 2, 3, '-ve']
['-ve', '-ve', '-ve', '0', '+ve', '+ve', '+ve', '???']


In [3]:
import pandas as pd
import numpy as np

# For x <- c(-3:3, NA)
x = [-3, -2, -1, 0, 1, 2, 3, np.NaN]

# if_else(x > 0, "+ve", "-ve")
result_1 = ["+ve" if i > 0 else "-ve" for i in x]

# Handling the three-case if_else 
result_2 = ["+ve" if i > 0 else "-ve" if i <= 0 else "???" for i in x]

# if_else(x < 0, -x, x)
result_3 = [-i if i < 0 else i for i in x]

x1 = [np.NaN, 1, 2, np.NaN]
y1 = [3, np.NaN, 4, 6]

# if_else(is.na(x1), y1, x1)
result_4 = [y if pd.isna(x) else x for x, y in zip(x1, y1)]

# Nested if_else 
result_5 = ["0" if i == 0 else "-ve" if i < 0 else "+ve" for i in x]

# case_when equivalent
def case_when(x):
    if pd.isna(x):
        return "???"
    elif x == 0:
        return "0"
    elif x < 0:
        return "-ve"
    elif x > 0:
        return "+ve"

result_6 = [case_when(i) for i in x]

# Simplified case_when equivalent with default
def case_when_with_default(x):
    if x < 0:
        return "-ve"
    elif x > 0:
        return "+ve"
    else:
        return "???"

result_7 = [case_when_with_default(i) for i in x]

# Another case_when 
def another_case_when(x):
    if x > 0:
        return "+ve"
    elif x > 2:
        return "big"

result_8 = [another_case_when(i) for i in x]

# Assuming flights.csv has been loaded as flights dataframe
flights = pd.read_csv('flights.csv')

def flight_status(row):
    if pd.isna(row['arr_delay']):
        return "cancelled"
    elif row['arr_delay'] < -30:
        return "very early"
    elif row['arr_delay'] < -15:
        return "early"
    elif abs(row['arr_delay']) <= 15:
        return "on time"
    elif row['arr_delay'] < 60:
        return "late"
    else:
        return "very late"

flights['status'] = flights.apply(flight_status, axis=1)

flights


Unnamed: 0,year,month,day,dep_time,sched_dep_time,dep_delay,arr_time,sched_arr_time,arr_delay,carrier,flight,tailnum,origin,dest,air_time,distance,hour,minute,time_hour,status
0,2013,1,1,517.0,515,2.0,830.0,819,11.0,UA,1545,N14228,EWR,IAH,227.0,1400,5,15,2013-01-01 05:00:00,on time
1,2013,1,1,533.0,529,4.0,850.0,830,20.0,UA,1714,N24211,LGA,IAH,227.0,1416,5,29,2013-01-01 05:00:00,late
2,2013,1,1,542.0,540,2.0,923.0,850,33.0,AA,1141,N619AA,JFK,MIA,160.0,1089,5,40,2013-01-01 05:00:00,late
3,2013,1,1,544.0,545,-1.0,1004.0,1022,-18.0,B6,725,N804JB,JFK,BQN,183.0,1576,5,45,2013-01-01 05:00:00,early
4,2013,1,1,554.0,600,-6.0,812.0,837,-25.0,DL,461,N668DN,LGA,ATL,116.0,762,6,0,2013-01-01 06:00:00,early
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
336771,2013,9,30,,1455,,,1634,,9E,3393,,JFK,DCA,,213,14,55,2013-09-30 14:00:00,cancelled
336772,2013,9,30,,2200,,,2312,,9E,3525,,LGA,SYR,,198,22,0,2013-09-30 22:00:00,cancelled
336773,2013,9,30,,1210,,,1330,,MQ,3461,N535MQ,LGA,BNA,,764,12,10,2013-09-30 12:00:00,cancelled
336774,2013,9,30,,1159,,,1344,,MQ,3572,N511MQ,LGA,CLE,,419,11,59,2013-09-30 11:00:00,cancelled
