In [1]:
!pip install streamlit

Collecting streamlit
[?25l  Downloading https://files.pythonhosted.org/packages/dd/8d/4c7676d01e90852254e2275fb4639b747274430f2fa066aa94848d3a6ee4/streamlit-0.73.1-py2.py3-none-any.whl (7.4MB)
[K     |████████████████████████████████| 7.4MB 4.0MB/s 
Collecting blinker
[?25l  Downloading https://files.pythonhosted.org/packages/1b/51/e2a9f3b757eb802f61dc1f2b09c8c99f6eb01cf06416c0671253536517b6/blinker-1.4.tar.gz (111kB)
[K     |████████████████████████████████| 112kB 46.4MB/s 
Collecting watchdog
[?25l  Downloading https://files.pythonhosted.org/packages/83/d9/3d1f46b428fd7b646725896b58d2eddb84f79fd76912773e6193cf74263d/watchdog-1.0.2-py3-none-manylinux2014_x86_64.whl (72kB)
[K     |████████████████████████████████| 81kB 9.6MB/s 
Collecting gitpython
[?25l  Downloading https://files.pythonhosted.org/packages/24/d1/a7f8fe3df258549b303415157328bfcc63e9b11d06a7ad7a3327f3d32606/GitPython-3.1.11-py3-none-any.whl (159kB)
[K     |████████████████████████████████| 163kB 44.3MB/s 
[?25hC

In [3]:
!pip install pyngrok==4.1.1



In [11]:
%%writefile app.py

import streamlit as st
import time
from tools import *

st.title("Utkonos Data Set")
df = get_df(10000)
df = data_preprocessing(df)

if st.checkbox("Показать выборку"):
    species = st.write('Классы',df['CancelFlag'].unique())
    st.markdown('Признаки:')
    st.write(df.columns)

    st.markdown('Выборка')
    st.write(df.head())

pie1, pie2 = get_pies(df)

if st.checkbox("Показать график в зависимости от время доставки"):
    st.plotly_chart(pie1)

if st.checkbox("Показать график в зависимости от предоплаты"):
    st.plotly_chart(pie2)

X, y = get_X_y(df)
X_best_cols = get_K_best_features(X, y).tolist()

if st.checkbox("Показать 10 наиболее важных признаков (f-value)"):
    st.write(X_best_cols)

X_train, X_test, y_train, y_test = get_train_test_scaled(X, y)

algs = ['None','Decision Tree','KNN', 'Random Forest', 'Bagging']
models = {'Decision Tree': get_decision_tree,
          'KNN': get_knn_model,
          'Random Forest': get_random_forest,
          'Bagging': get_bagging}
classifier = st.selectbox('Выберите алгоритм', algs)

if st.button('Запустить модель'):
    if classifier == 'None':
        st.warning('Пожалуйста выберите модель {}'.format(algs[1:]))

    else:
      train_time = time.time()
      model = models[classifier](X_train, y_train)
      acc = model.score(X_test,y_test)
      st.write('Accuracy: ', acc)

      predictions = model.predict(X_test)
      confusion = confusion_matrix(y_test,predictions)
      st.write('Confusion Matrix: ',confusion)

      fig = get_roc_auc(model, X_test, y_test)
      st.plotly_chart(fig)
      ed_time = time.time()
      st.write('Времени потрачено на подсчет результата: ',ed_time-train_time)

Overwriting app.py


In [None]:
!ls

app.py	drive  __pycache__  sample_data  tools.py


In [6]:
!ngrok authtoken 1m8pKKOfayop9VVArkFZLpUeaUB_3gDK1j79pN81yKRrtLb1o

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [None]:
!ngrok

NAME:
   ngrok - tunnel local ports to public URLs and inspect traffic

DESCRIPTION:
    ngrok exposes local networked services behinds NATs and firewalls to the
    public internet over a secure tunnel. Share local websites, build/test
    webhook consumers and self-host personal services.
    Detailed help for each command is available with 'ngrok help <command>'.
    Open http://localhost:4040 for ngrok's web interface to inspect traffic.

EXAMPLES:
    ngrok http 80                    # secure public URL for port 80 web server
    ngrok http -subdomain=baz 8080   # port 8080 available at baz.ngrok.io
    ngrok http foo.dev:80            # tunnel to host:port instead of localhost
    ngrok http https://localhost     # expose a local https server
    ngrok tcp 22                     # tunnel arbitrary TCP traffic to port 22
    ngrok tls -hostname=foo.com 443  # TLS traffic for foo.com to port 443
    ngrok start foo bar baz          # start tunnels from the configuration file

VERSI

In [12]:
!streamlit run app.py &>/dev/null&

In [8]:
from pyngrok import ngrok

In [13]:
# Setup a tunnel to the streamlit port 8501
public_url = ngrok.connect(port='8501')
public_url

'http://1d2e07d822f1.ngrok.io'

In [None]:
!pgrep streamlit

1976
2050
2113
2193
2262
2309
2349


In [10]:
ngrok.kill()

In [None]:
!killall streamlit

In [None]:
def get_df(samples):
  df = pd.read_csv('/content/drive/MyDrive/Анализ_Данных_Соцкова_Вероника_11-802/Practice/balanced_utkonos.csv', low_memory=False)
  df.index = df['Unnamed: 0'].values
  df = df.drop('Unnamed: 0', axis=1)
  df = df.dropna()
  df_0 = df[df.CancelFlag==0].sample(samples)
  df_1 = df[df.CancelFlag==1].sample(samples)
  df = pd.concat([df_0, df_1])

  return df

In [None]:
def data_preprocessing(df):
  tmp = df['Interval'].str.split('-')
  df['interval_low']=tmp.apply(lambda x: int(x[0]))
  df['interval_high']=tmp.apply(lambda x: int(x[1][:-1]))
  del df['Interval']

  df['interval_avg'] = (df['interval_high'] + df['interval_low']) / 2

  morning = list(range(6, 12))
  day = list(range(12, 18))
  evening = list(range(18, 24))
  night = [24] + list(range(1, 6))

  morning = df['interval_avg'].isin(morning)
  day = df['interval_avg'].isin(day)
  evening = df['interval_avg'].isin(evening)
  night = df['interval_avg'].isin(night)

  df['morning'] = morning.apply(lambda x: 1 if x else 0)
  df['day'] = day.apply(lambda x: 1 if x else 0)
  df['evening'] = evening.apply(lambda x: 1 if x else 0)
  df['night'] = night.apply(lambda x: 1 if x else 0)

  orderDate = df.OrderDate.apply (lambda x: datetime.datetime.strptime (x, '%d/%m/%Y'))
  date = df.Date.apply (lambda x: datetime.datetime.strptime (x, '%d/%m/%Y'))
  df['delta_day'] = (date-orderDate).dt.days.astype(int).values

  df['count_edit'] -= 1

  df['DeliveryType'] = df['DeliveryType'].map({'Обычная доставка': 0, 'Доставка День в День': 1})

  df = df.drop(['ClientID'], axis=1)

  del df['Date']
  del df['OrderDate']

  le = LabelEncoder()
  df['Cluster'] = le.fit_transform(df['Cluster'])

  return df

In [None]:
def get_pies(df):
  #1
  cancel_data = df[['CancelFlag', 'morning',	'day', 'evening',	'night']].groupby('CancelFlag').sum()
  size1 = cancel_data.values[1]
  size2 = cancel_data.values[0]
  proportion=size1/(size1+size2)
  labels = 'morning',	'day', 'evening',	'night'
  fig1 = px.pie(cancel_data, values=proportion*100, names=labels)

  #2
  cancel_data2 = df[df.CancelFlag==1].groupby('prepay').count()
  labels2 = 'Отказывают без предоплаты', 'Отказывают с предоплатой'

  fig2 = px.pie(cancel_data2, values=cancel_data2['CancelFlag'].values, names=labels2)
  return fig1, fig2

In [None]:
def get_X_y(df):
  X = df.drop('CancelFlag', axis=1)
  y = df.CancelFlag.astype(int)  

  return X, y

In [None]:
def get_K_best_features(X, y):
  selector = SelectKBest(f_classif, k=10)
  selector.fit(X, y)

  X_best_col = X.columns[selector.get_support(indices=True)]
  return X_best_col

In [None]:
def get_train_test_scaled(X, y):
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
  scaler = StandardScaler()
  scaler.fit(X_train)

  X_train_scaled = scaler.transform(X_train)
  X_test_scaled = scaler.transform(X_test)
  return X_train_scaled, X_test_scaled, y_train, y_test

In [None]:
def get_roc_auc(model, X_test, y_test):
  y_score = model.predict_proba(X_test)[:, 1]

  fpr, tpr, thresholds = roc_curve(y_test, y_score)

  fig = px.area(
      x=fpr, y=tpr,
      title=f'ROC Curve (AUC={auc(fpr, tpr):.4f})',
      labels=dict(x='False Positive Rate', y='True Positive Rate'),
      width=700, height=500
  )
  fig.add_shape(
      type='line', line=dict(dash='dash'),
      x0=0, x1=1, y0=0, y1=1
  )

  fig.update_yaxes(scaleanchor="x", scaleratio=1)
  fig.update_xaxes(constrain='domain')
  return fig

In [None]:
def get_knn_model(X_train, y_train):
  knn = KNeighborsClassifier(n_neighbors=25)
  knn.fit(X_train, y_train)
  return knn

In [None]:
def get_decision_tree(X_train, y_train):
  tree_param = {'criterion':['gini','entropy'],'max_depth':[4,5,6,7,8,9,10,11,12,15,20,30,40,50,70,90,120,150]}
  tree_gridsearch = GridSearchCV(DecisionTreeClassifier(), tree_param, cv=5)
  tree_gridsearch.fit(X_train, y_train)
  return tree_gridsearch

In [None]:
def get_random_forest(X_train, y_train):
  rand_forest = RandomForestClassifier(max_depth=2, random_state=0)
  rand_forest.fit(X_train, y_train)
  return rand_forest

In [None]:
def get_bagging(X_train, y_train):
  b_model = BaggingClassifier()
  b_model.fit(X_train,y_train)
  return b_model