In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns

# קריאת הקובץ עם טיפול בערכים מעורבים
file_path = 'flights_cleaned.csv'  # ודא שהשם של הקובץ נכון
data = pd.read_csv(file_path, dtype={'ORIGIN_AIRPORT': str, 'DESTINATION_AIRPORT': str}, low_memory=False)

# יצירת גרף מכוון מהנתונים
def create_directed_graph(data):
    G = nx.DiGraph()
    for index, row in data.iterrows():
        G.add_edge(row['ORIGIN_AIRPORT'], row['DESTINATION_AIRPORT'])
    return G

graph = create_directed_graph(data)

# חישוב Betweenness Centrality
betweenness = nx.betweenness_centrality(graph)

# יצירת DataFrame עבור מדדי המרכזיות
centrality_df = pd.DataFrame(list(betweenness.items()), columns=['Airport', 'Betweenness_Centrality'])

# חישוב עיכובים ממוצעים לכל נמל תעופה
delay_data = data.groupby('ORIGIN_AIRPORT')['DEPARTURE_DELAY'].mean().reset_index()
delay_data.columns = ['Airport', 'Average_Departure_Delay']

# מיזוג הנתונים
merged_data = pd.merge(centrality_df, delay_data, on='Airport')

# הדפסת התוצאות
print(merged_data)

# יצירת גרף להשוואת מדדי המרכזיות לעיכובים הממוצעים
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Betweenness_Centrality', y='Average_Departure_Delay', data=merged_data)
plt.xlabel('Betweenness Centrality')
plt.ylabel('Average Departure Delay')
plt.title('Betweenness Centrality vs. Average Departure Delay')
plt.show()

# חישוב הקורלציה
correlation = merged_data['Betweenness_Centrality'].corr(merged_data['Average_Departure_Delay'])
print(f'Correlation between Betweenness Centrality and Average Departure Delay: {correlation}')

H2

In [None]:
import networkx as nx
from networkx.algorithms.community import girvan_newman
import community.community_louvain as community_louvain
import pandas as pd
import matplotlib.pyplot as plt
from itertools import cycle

# פונקציה לחלוקת הגרף לקהילות לפי אלגוריתם Girvan-Newman
def detect_girvan_newman_communities(graph):
    comp = girvan_newman(graph)
    return next(comp)

# פונקציה לחלוקת הגרף לקהילות לפי אלגוריתם Louvain
def detect_louvain_communities(graph):
    partition = community_louvain.best_partition(graph)
    communities = {}
    for node, community in partition.items():
        if community not in communities:
            communities[community] = []
        communities[community].append(node)
    return list(communities.values())

# יצירת גרפים עבור כל חודש
def create_graph(data):
    G = nx.Graph()
    for index, row in data.iterrows():
        G.add_edge(row['ORIGIN_AIRPORT'], row['DESTINATION_AIRPORT'])
    return G

# קריאת הקובץ
file_path = 'flights_cleaned.csv'
data = pd.read_csv(file_path, dtype={'ORIGIN_AIRPORT': str, 'DESTINATION_AIRPORT': str}, low_memory=False)

data_january = data[data['MONTH'] == 1]
data_february = data[data['MONTH'] == 2]
data_march = data[data['MONTH'] == 3]

graph_january = create_graph(data_january)
graph_february = create_graph(data_february)
graph_march = create_graph(data_march)

# בדיקת מספר הצמתים והקשתות בכל חודש
print(f"January: {graph_january.number_of_nodes()} nodes, {graph_january.number_of_edges()} edges")
print(f"February: {graph_february.number_of_nodes()} nodes, {graph_february.number_of_edges()} edges")
print(f"March: {graph_march.number_of_nodes()} nodes, {graph_march.number_of_edges()} edges")

# זיהוי קהילות בכל חודש לפי Girvan-Newman
communities_january_gn = detect_girvan_newman_communities(graph_january)
communities_february_gn = detect_girvan_newman_communities(graph_february)
communities_march_gn = detect_girvan_newman_communities(graph_march)

num_communities_january_gn = len(list(communities_january_gn))
num_communities_february_gn = len(list(communities_february_gn))
num_communities_march_gn = len(list(communities_march_gn))

print(f"Girvan-Newman - January: {num_communities_january_gn} communities")
print(f"Girvan-Newman - February: {num_communities_february_gn} communities")
print(f"Girvan-Newman - March: {num_communities_march_gn} communities")

# זיהוי קהילות בכל חודש לפי Louvain
communities_january_louvain = detect_louvain_communities(graph_january)
communities_february_louvain = detect_louvain_communities(graph_february)
communities_march_louvain = detect_louvain_communities(graph_march)

num_communities_january_louvain = len(communities_january_louvain)
num_communities_february_louvain = len(communities_february_louvain)
num_communities_march_louvain = len(communities_march_louvain)

print(f"Louvain - January: {num_communities_january_louvain} communities")
print(f"Louvain - February: {num_communities_february_louvain} communities")
print(f"Louvain - March: {num_communities_march_louvain} communities")

# פונקציה לויזואליזציה של הקהילות
def plot_communities(graph, communities, title):
    pos = nx.spring_layout(graph)
    plt.figure(figsize=(12, 8))

    # יצירת מחזור צבעים
    colors = cycle(['r', 'g', 'b', 'c', 'm', 'y', 'k'])

    for community in communities:
        color = next(colors)
        nx.draw_networkx_nodes(graph, pos, nodelist=community, node_size=50, node_color=color)
    nx.draw_networkx_edges(graph, pos, alpha=0.5)
    plt.title(title)
    plt.show()

# ויזואליזציה של הקהילות
plot_communities(graph_january, communities_january_gn, 'Communities in January (Girvan-Newman)')
plot_communities(graph_february, communities_february_gn, 'Communities in February (Girvan-Newman)')
plot_communities(graph_march, communities_march_gn, 'Communities in March (Girvan-Newman)')

plot_communities(graph_january, communities_january_louvain, 'Communities in January (Louvain)')
plot_communities(graph_february, communities_february_louvain, 'Communities in February (Louvain)')
plot_communities(graph_march, communities_march_louvain, 'Communities in March (Louvain)')

H3

In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns

# קריאת הקובץ עם טיפול בערכים מעורבים
file_path = 'flights_cleaned.csv'  # ודא שהשם של הקובץ נכון
data = pd.read_csv(file_path, dtype={'ORIGIN_AIRPORT': str, 'DESTINATION_AIRPORT': str}, low_memory=False)

# יצירת גרף מכוון מהנתונים
def create_directed_graph(data):
    G = nx.DiGraph()
    for index, row in data.iterrows():
        G.add_edge(row['ORIGIN_AIRPORT'], row['DESTINATION_AIRPORT'])
    return G

graph = create_directed_graph(data)

# חישוב Closeness Centrality
closeness = nx.closeness_centrality(graph)

# יצירת DataFrame עבור מדדי המרכזיות
centrality_df = pd.DataFrame(list(closeness.items()), columns=['Airport', 'Closeness_Centrality'])

# חישוב מספר הטיסות לכל נמל תעופה
origin_counts = data['ORIGIN_AIRPORT'].value_counts()
destination_counts = data['DESTINATION_AIRPORT'].value_counts()
total_counts = origin_counts.add(destination_counts, fill_value=0).reset_index()
total_counts.columns = ['Airport', 'Total_Flights']

# מיזוג הנתונים
merged_data = pd.merge(centrality_df, total_counts, on='Airport')

# גרף פיזור (scatter plot) של Closeness Centrality לפי מספר הטיסות
plt.figure(figsize=(12, 6))
sns.scatterplot(x='Closeness_Centrality', y='Total_Flights', data=merged_data)
plt.title('Closeness Centrality vs Total Flights')
plt.xlabel('Closeness Centrality')
plt.ylabel('Total Flights')
plt.show()

# חישוב הקורלציה
correlation = merged_data['Closeness_Centrality'].corr(merged_data['Total_Flights'])
print(f'Correlation between Closeness Centrality and Total Flights: {correlation}')