In [None]:
# check the cars that have a disabled parking permit
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

#import sys
#sys.path.append('/Users/dpelleg/miniconda/lib/python3.10/site-packages')

from statsmodels.stats.proportion import proportions_ztest

# do I need this? how to persist the change in path?


In [None]:
datadir = '../data/'

def count_repeated_chars(string, k):
    count = 0
    for i in range(len(string)-(k-1)):
        if string[i:i+k] == string[i]*k:
            count += 1
    return count

def is_palindrome(string):
    return string == string[::-1]

def has_numeric_sequence_3(string):
    for i in range(len(string)-2):
        if int(string[i])+1 == int(string[i+1]) and int(string[i+1])+1 == int(string[i+2]):
            return True
    return False

def has_numeric_sequence_3_rev(string):
    return has_numeric_sequence_3(string[::-1])

def count_distinct_chars(string):
    return len(set(string))

def distinct_chars_under(s, n):
    return count_distinct_chars(s) <= n

def distinct_chars_over(s, n):
    return count_distinct_chars(s) >= n

def count_repeated_sequences(string, seq_length):
    count = 0
    for i in range(len(string) - seq_length + 1):
        if string[i:i+seq_length] in string[i+seq_length:]:
            count += 1
    return count

scoremap = {
    lambda s: count_repeated_chars(s, 2): 1,
    lambda s: count_repeated_chars(s, 3): 1,
    lambda s: count_repeated_chars(s, 4): 1,
    is_palindrome: 5,
    has_numeric_sequence_3 : 2,
    has_numeric_sequence_3_rev : 2,
    lambda s: distinct_chars_under(s, 1): 10,
    lambda s: distinct_chars_under(s, 2): 5,
    lambda s: distinct_chars_under(s, 3): 2,
#    lambda s: distinct_chars_over(s, 5): 2,
#    lambda s: distinct_chars_over(s, 6): 5,
#    lambda s: distinct_chars_over(s, 7): 6,
#    lambda s: distinct_chars_over(s, 8): 8,
    lambda s: count_repeated_sequences(s, 2): 2,
    lambda s: count_repeated_sequences(s, 3): 3,
}

def score(s):
    ret = 0
    for idx, (f, mult) in enumerate(scoremap.items()):
        result = f(s)
        ret += mult*int(result)
    return ret

In [None]:
def read_file(fname, enc='iso8859-8'):
    fd = open(fname, encoding=enc, errors='replace')
    df = pd.read_csv(fd, sep='|')
    return df

def add_model(df):
    df['model'] = df.apply(lambda x: '_'.join([x[y] for y in ['tozeret_cd', 'degem_cd', 'shnat_yitzur', 'sug_degem']]), axis=1)

def get_model_name(ns):
    names = pd.DataFrame(data={'model':ns})
    ret = pd.merge(names, models, how='left', on='model')[['tozeret_nm', 'kinuy_mishari']]
    return ret

def ztest_proportions(df, population, test):
    xtab=pd.crosstab(df[population], df[test])
    se, pval = proportions_ztest(xtab[True].to_list(), xtab.sum(axis=1).to_list())
    return pval

In [None]:
# source : https://data.gov.il/dataset/private-and-commercial-vehicles/resource/053cea08-09bc-40ec-8f7a-156f0677aff3?inner_span=True

#df = pd.read_csv('data/rechev-small.csv', encoding='iso-8859-1', sep='|')
cars = read_file(datadir + 'rechev.csv')

for c in ['mispar_rechev', 'degem_cd', 'tozeret_cd', 'shnat_yitzur']:
    cars[c] = cars[c].astype(str)

cars['time'] = pd.to_datetime(cars['moed_aliya_lakvish'], format='%Y-%m').dt.strftime('%Y-%m')

In [None]:
elec_cars = cars.query("sug_delek_nm == 'חשמל'").copy()

In [None]:
elec_cars.query('shnat_yitzur >= "2022"')['kinuy_mishari'].value_counts()

In [None]:
capacities = pd.read_csv('../data/capacities.csv')
capacities

In [None]:
elec_cars = elec_cars.merge(right=capacities, left_on='kinuy_mishari', right_on='model', how='left')
power_counts = pd.DataFrame(elec_cars['max_power'].value_counts().sort_index().reset_index())
power_counts.columns = ['max_power', 'count']
power_counts.to_csv(datadir + 'power_counts.csv', index=False)

In [None]:
power_counts

In [None]:
import numpy as np
all_max_powers = np.repeat(power_counts['max_power'].to_list(), power_counts['count'].to_list())
sns.ecdfplot(all_max_powers)
plt.title('CDF of max_power')
plt.show()

In [None]:
elec_cars.query("time == '2022-08'")['kinuy_mishari'].value_counts()

In [None]:
elec_cars.query("kinuy_mishari == 'IONIQ5'")['time'].value_counts().sort_index()

In [None]:
df = elec_cars.query("time > '2020-01'").groupby('time').agg({
    'capacity': ['mean', 'size'],
    'max_power': 'mean'
})

# Rename columns for clarity
df.columns = ['mean_capacity', 'size_capacity', 'mean_max_power']
df['total_capacity'] = df['mean_capacity'] * df['size_capacity']

In [None]:
df.tail()

In [None]:
tot_bat_capacity = df.sum().total_capacity
annual_capacity_100_util = 365*tot_bat_capacity
utilization = (0.3*(10**9))/annual_capacity_100_util
print(f"{utilization}, {annual_capacity_100_util}")

In [None]:
def sparsify_xticks(ax, spacing=10):
    for ind, label in enumerate(ax.get_xticklabels()):
        if ind % spacing == 0:
            label.set_visible(True)
        else:
            label.set_visible(False)

def rev(s):
    return s[::-1]

# Draw an orange frame around the chart
def nice_border():
    for spine in plt.gca().spines.values():
        spine.set_visible(True)
        spine.set_color('orange')
        spine.set_linewidth(3)  # Set the border width

sns.set(style="whitegrid")
plt.figure(figsize=(10, 6))

# Plot the smoothed mean capacity over time with width 3
ax=sns.lineplot(x='time', y='mean_capacity', data=df, linewidth=3, marker='o', markersize=8)

plt.title(rev("קיבולת סוללה ממוצעת לפי מכירות בישראל"))
plt.xlabel(rev('תאריך'))
plt.ylabel('(kWh) ' + rev('קיבולת'))
sparsify_xticks(ax, 8)
nice_border()
plt.tight_layout()  # Adjust layout for better spacing
plt.show()

In [None]:
plt.figure(figsize=(10, 6))

ax=sns.lineplot(x='time', y='mean_max_power', data=df, linewidth=3, marker='o', markersize=8)

plt.title(rev("זרם טעינה מקסימלי לפי מכירות בישראל"))
plt.xlabel(rev('תאריך'))
plt.ylabel('(kW) ' +rev('זרם'))
sparsify_xticks(ax, 8)
nice_border()
plt.tight_layout()  # Adjust layout for better spacing
plt.show()

In [None]:
sns.set(style="whitegrid")
plt.figure(figsize=(10, 6))

# Plot the smoothed mean capacity over time with width 3
ax=sns.lineplot(x='time', y='total_capacity', data=df, linewidth=3, marker='o', markersize=8)

plt.title(rev("קיבולת סוללה כוללת נמכרת בישראל"))
plt.xlabel(rev('תאריך'))
plt.ylabel('(MWh) ' + rev('קיבולת'))
sparsify_xticks(ax, 8)
nice_border()

# Scale down Y-axis values and ticks by a factor of 1/1000
y_formatter = ticker.FuncFormatter(lambda x, pos: f'{x/1000:.0f}')
plt.gca().yaxis.set_major_formatter(y_formatter)

plt.tight_layout()  # Adjust layout for better spacing

plt.show()

In [None]:
# popular models
model='kinuy_mishari'
pop_models=elec_cars[model].value_counts().head(10).index.to_list()
#elec_cars['time'].value_counts().sort_index(ascending=False).head(10)

In [None]:
pop_models

In [None]:
# count model sales over time
df = elec_cars.query('kinuy_mishari in @pop_models')
count_df = df.groupby(['time', 'kinuy_mishari']).size().unstack()
#count_df = count_df.div(count_df.sum(axis=1), axis=0)

In [None]:
count_df.tail()

In [None]:
count_df.query('time > "2022-01"').plot(kind='bar', stacked=True, figsize=(10, 6))

In [None]:
elec_cars.query("time == '2023-08'")[model].value_counts().head(10)

In [None]:
count_df = cars.groupby(['time', 'sug_delek_nm']).size().unstack()
count_df = count_df.div(count_df.sum(axis=1), axis=0)

In [1]:
#count_df.query('time > "2021-01"').plot(kind='bar', stacked=True, figsize=(10, 6))