In [17]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

import random
import collections

In [4]:
random.seed(42)

sequence = range(0, 100)
data = (random.randint(0, 100) for _ in sequence)

df = pd.DataFrame({'data': data, 'sequence': sequence})
df.head()

Unnamed: 0,data,sequence
0,81,0
1,14,1
2,3,2
3,94,3
4,35,4


In [63]:
go.Figure(go.Scatter(
    y=df['data'],
    x=df['sequence'],
    mode='lines+markers',
    marker=dict(
        size=8,
        color='dimgrey'
    )
))

In [9]:
def get_turning_points(data):
    dx = np.diff(data)
    return (dx[1:] * dx[:1]) < 0


def get_turning_points_another_way(data):
    return [x0*x1 + x1*x2 < x1*x1 + x0*x2 for x0, x1, x2 in zip(data[2:], data[1:-1], data[:-2])]

In [10]:
# first way
# t_points = get_turning_points(list(df.data))

# second way
t_points = np.array(get_turning_points_another_way(list(df.data)))

t_points = np.insert(t_points, [0, len(t_points)], [False, False])
df['t_points'] = t_points

In [6]:
def set_color(x):
        if(x == 1):
            return "orange"
        else:
            return "white"

In [62]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    y = df['data'],
    x = df['sequence'],
    showlegend=False
))


fig.add_trace(go.Scatter(
    name='turning points',
    y = df[df['t_points'] == True]['data'],
    x = df[df['t_points'] == True]['sequence'],
    mode='markers',
    marker=dict(
        size=8,
        color='deeppink'
    )
))


fig.add_trace(go.Scatter(
    name='not a turning points',
    y = df[df['t_points'] == False]['data'],
    x = df[df['t_points'] == False]['sequence'],
    mode='markers',
    marker=dict(
        size=8,
        color='dimgrey'
    )
))


fig.update_layout(showlegend=True, height=550)
fig

In [60]:
def is_random_dataset(df):
    n = len(df)
    
    mean = 2*(n-2)/3
    dispersion = (16*n-29)/90
    std = np.sqrt(dispersion)
    t_points = sum(df['t_points'])
    
    print(f'Mean: {mean}\n'
          f'Dispersion: {dispersion}\n'
          f'Std: {std}\n\n'
          f'Mean - Std: {mean - std}\n'
          f'Mean + Std: {mean + std}\n'
          f'T_points: {t_points}'
    )
    
    return mean - std <= sum(df['t_points']) <= mean + std

In [61]:
is_random_dataset(df)

Mean: 65.33333333333333
Dispersion: 17.455555555555556
Std: 4.1779846284489315

Mean - Std: 61.155348704884396
Mean + Std: 69.51131796178225
T_points: 64


True