In [2]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

In [3]:
all_ratings = pd.read_csv('data/all_ratings.csv')
ratings_single_account = pd.read_csv('data/ratings_single_account.csv')
movies_with_ratings_single_account = pd.read_csv('data/movies_single_account.csv')

In [4]:
all_ratings.head()

Unnamed: 0,rating_id,userId,movieId,rating,rating_date
0,1,1,307,3.5,2009-10-27 09:00:21
1,2,1,481,3.5,2009-10-27 09:04:16
2,3,1,1091,1.5,2009-10-27 09:04:31
3,4,1,1257,4.5,2009-10-27 09:04:20
4,5,1,1449,4.5,2009-10-27 09:01:04


In [5]:
len(all_ratings)

27753444

In [6]:
ratings_single_account.head()

Unnamed: 0,rating_id,userId,movieId,rating,rating_date
0,19834829,202382,1,5.0,1997-05-20 06:50:22
1,25102792,256349,1,5.0,2017-08-24 03:49:46
2,15849334,161826,1,3.0,2016-12-17 15:13:43
3,21184239,216433,1,5.0,2015-11-19 04:07:21
4,4464337,45860,1,5.0,2001-11-24 12:06:59


In [7]:
len(ratings_single_account)

5620

In [8]:
movies_with_ratings_single_account.head()

Unnamed: 0,Movie Id,Amount,title,release_year
0,318,348,"Shawshank Redemption, The (1994)",1994
1,2858,113,American Beauty (1999),1999
2,2571,108,"Matrix, The (1999)",1999
3,356,71,Forrest Gump (1994),1994
4,1721,59,Titanic (1997),1997


In [9]:
len(movies_with_ratings_single_account)

1678

In [10]:
def return_list_of_binned_ratings(list_of_ratings, ratings):
    ratings_for_movie_binned_lst = len(list_of_ratings) * [0]
    
    for rating in ratings:
        index = list_of_ratings.index(rating)
        ratings_for_movie_binned_lst[index] += 1

    return dict(zip(list_of_ratings,ratings_for_movie_binned_lst))

In [11]:
common = all_ratings.merge(ratings_single_account,on=['rating_id'])
ratings_without_single_account = all_ratings[~all_ratings.rating_id.isin(common.rating_id)]
print(f'Die Zahl der Beiden listen Zusammenaddiert: {len(ratings_without_single_account) + len(ratings_single_account)} sollte mit der Zahl der ganzen Liste {len(all_ratings)} übereinstimmen')
ratings_without_single_account.head()

Die Zahl der Beiden listen Zusammenaddiert: 27753444 sollte mit der Zahl der ganzen Liste 27753444 übereinstimmen


Unnamed: 0,rating_id,userId,movieId,rating,rating_date
0,1,1,307,3.5,2009-10-27 09:00:21
1,2,1,481,3.5,2009-10-27 09:04:16
2,3,1,1091,1.5,2009-10-27 09:04:31
3,4,1,1257,4.5,2009-10-27 09:04:20
4,5,1,1449,4.5,2009-10-27 09:01:04


In [12]:
list_of_ratings = [5.0, 4.5, 4.0, 3.5, 3.0, 2.5, 2.0, 1.5, 1.0, 0.5]  
list_of_ratings = list_of_ratings[::-1]

binned_ratings_single_account = return_list_of_binned_ratings(list_of_ratings, ratings_single_account['rating'])
binned_ratings_without_single_accounts = return_list_of_binned_ratings(list_of_ratings, ratings_without_single_account['rating'])

In [13]:
print(binned_ratings_single_account)
print(binned_ratings_without_single_accounts)

{0.5: 164, 1.0: 194, 1.5: 84, 2.0: 255, 2.5: 175, 3.0: 714, 3.5: 442, 4.0: 1284, 4.5: 495, 5.0: 1813}
{0.5: 442224, 1.0: 886039, 1.5: 441270, 2.0: 1850372, 2.5: 1373244, 3.0: 5514954, 3.5: 3403918, 4.0: 7393426, 4.5: 2373055, 5.0: 4069322}


In [14]:
binned_ratings_single_account_percentage = {k: v / len(ratings_single_account) for k, v in binned_ratings_single_account.items()}
binned_ratings_without_single_accounts_percentage = {k: v / len(ratings_without_single_account) for k, v in binned_ratings_without_single_accounts.items()}

In [15]:
print(binned_ratings_single_account_percentage)
print(binned_ratings_without_single_accounts_percentage)

{0.5: 0.029181494661921707, 1.0: 0.03451957295373666, 1.5: 0.01494661921708185, 2.0: 0.045373665480427046, 2.5: 0.03113879003558719, 3.0: 0.12704626334519573, 3.5: 0.07864768683274022, 4.0: 0.22846975088967972, 4.5: 0.08807829181494661, 5.0: 0.3225978647686833}
{0.5: 0.015937249710103393, 1.0: 0.03193183724965244, 1.5: 0.015902868635753204, 2.0: 0.06668530116091266, 2.5: 0.04949015101148112, 3.0: 0.19875266615501094, 3.5: 0.12267333106913177, 4.0: 0.2664506593381881, 4.5: 0.0855222016688588, 5.0: 0.14665373400090762}


In [147]:
y = ['Ratings from Users with only one rating', 'Ratings from Users with multiple ratings']
i = 0
colors = px.colors.sequential.Viridis
fig = go.Figure(go.Bar())
for rating in list_of_ratings:
    if binned_ratings_single_account_percentage[rating] <= 0.02:
        text = 'none'
    else:
        text = 'inside'

    fig.add_trace(go.Bar(y=y,
        x=[binned_ratings_single_account_percentage[rating],
        binned_ratings_without_single_accounts_percentage[rating]],
        name=f'{rating} Points',
        showlegend=False,
        orientation='h',
        textangle=0,
        marker_color=colors[i],
        textposition=text,
        text=rating,
        insidetextanchor='middle'))
    i += 1

fig.add_annotation(x=0, y=0,
            text="1.5",
            showarrow=False,
            yshift=195,
            xshift=68)

fig.add_annotation(x=0, y=0,
            text="1.5",
            showarrow=False,
            yshift=60,
            xshift=55)

print(fig.data[1])#.textposition = 'none'


fig.update_layout(barmode='stack')
fig.update_layout(yaxis={'categoryorder':'array', 'categoryarray':['Ratings from Users with multiple ratings', 'Ratings from Users with only one rating']})
fig.show()

Bar({
    'insidetextanchor': 'middle',
    'marker': {'color': '#440154'},
    'name': '0.5 Points',
    'orientation': 'h',
    'showlegend': False,
    'text': '0.5',
    'textangle': 0,
    'textposition': 'inside',
    'x': [0.029181494661921707, 0.015937249710103393],
    'y': [Ratings from Users with only one rating, Ratings from Users with
          multiple ratings]
})


The goal ist to find suspicious activities in the 

In [99]:
from plotly.subplots import make_subplots
from ipywidgets import VBox

In [262]:
def calcuate_position_outside_annotations(dict_of_stacked_elements, list_of_ratings, threshold):
    sum = 0
    dict_position_to_small_ratings = {}
    for rating in list_of_ratings:
        if dict_of_stacked_elements[rating] >= threshold:
            sum += dict_of_stacked_elements[rating]
        else:
            dict_position_to_small_ratings[rating] = sum + binned_ratings_without_single_accounts_percentage[rating] / 2
            sum += dict_of_stacked_elements[rating]

    return dict_position_to_small_ratings

In [263]:
threshold = 0.02
descripion_only_one_ratings = ['Ratings from Users with only one rating']
descriont_multiple_ratings = ['Ratings from Users with multiple ratings']
i = 0
colors = px.colors.sequential.Viridis
fig = make_subplots(rows=2, cols=1, shared_xaxes=True)
for rating in list_of_ratings:
    if binned_ratings_single_account_percentage[rating] <= threshold:
        text = 'none'
    else:
        text = 'inside'

    fig.add_trace(go.Bar(y=descripion_only_one_ratings,
        x=[binned_ratings_single_account_percentage[rating]],
        name=f'{rating} Points',
        showlegend=False,
        orientation='h',
        textangle=0,
        marker_color=colors[i],
        textposition=text,
        text=rating,
        insidetextanchor='middle'), 1, 1)
    i += 1

i = 0

for rating in list_of_ratings:
    if binned_ratings_without_single_accounts_percentage[rating] <= threshold:
        text = 'none'
    else:
        text = 'inside'

    fig.add_trace(go.Bar(y=descriont_multiple_ratings,
        x=[binned_ratings_without_single_accounts_percentage[rating]],
        name=f'{rating} Points',
        showlegend=False,
        orientation='h',
        textangle=0,
        marker_color=colors[i],
        textposition=text,
        text=rating,
        insidetextanchor='middle'), 2, 1)
    i += 1


position_to_small_ratings_single_account = calcuate_position_outside_annotations(binned_ratings_single_account_percentage, list_of_ratings, threshold)

for rating in position_to_small_ratings_single_account:
    fig.add_annotation(x=position_to_small_ratings_single_account[rating], y=0,
                        text=rating,
                        showarrow=False,
                        yshift=51,
                        xshift=0,
                        xref='x1',
                        yref='y1')   

position_to_small_ratings_without_single_account = calcuate_position_outside_annotations(binned_ratings_without_single_accounts_percentage, list_of_ratings, threshold)

for rating in position_to_small_ratings_without_single_account:
    fig.add_annotation(x=position_to_small_ratings_without_single_account[rating], y=0,
                        text=rating,
                        showarrow=False,
                        yshift=51,
                        xshift=0,
                        xref='x1',
                        yref='y2')   

fig.update_layout(barmode='stack')
fig.show()

In [257]:
calcuate_position_outside_annotations(binned_ratings_single_account_percentage, list_of_ratings, threshold)

{1.5: 0.07165250193353496}

In [52]:
# Create the data for the chart
x = ['Category 1', 'Category 2', 'Category 3']
y = [[20, 30, 50], [10, 40, 50]]

# Create the figure and the stacked bar chart
fig = go.Figure()
fig.add_trace(go.Bar(x=x, y=y[0], name='Subcategory 1', text=['20', '30', '50']))
fig.add_trace(go.Bar(x=x, y=y[1], name='Subcategory 2', text=['10', '40', '50']))

# Configure the layout
fig.update_layout(barmode='stack')

# Display the figure
fig.show()

In [55]:
import plotly.graph_objs as go

# Create lists for x and y values
x_values = ['Label 1', 'Label 2', 'Label 3']
y_values = [5, 10, 15]

# Create a list of colors for the bars
colors = ['#FF0000', '#00FF00', '#0000FF']

# Create a trace for the bar chart
trace = go.Bar(x=x_values, y=y_values, orientation='h', marker_color=colors)

# Create a layout for the chart
layout = go.Layout(title='Bar Chart', xaxis_title='Labels', yaxis_title='Values')

# Create a figure object
fig = go.Figure(data=[trace], layout=layout)

# Display the chart
fig.show()


In [57]:
import plotly.graph_objects as go

# Data for the bar chart
y = ["Category 1", "Category 2", "Category 3"]
x = [100, 200, 300]
labels = ["Label 1", "Label 2", "Label 3"]

# Create the bar chart
fig = go.Figure(data=[go.Bar(x=x, y=y, text=labels, textposition="inside", texttemplate="%{label}", orientation="h")])

# Customize the layout and show the chart
fig.update_layout(barmode="stack")
fig.show()


In [77]:
fig = go.Figure()

y = ['Category 1', 'Category 2', 'Category 3']
x = [[20, 30, 50], [10, 20, 30], [5, 10, 15]]
names = ['Stack 1', 'Stack 2', 'Stack 3']

for i in range(3):
    fig.add_trace(go.Bar(x=x[i], name=names[i], orientation='h'))

annotations = []
for i, x_val in enumerate(x):
    for j, y_val in enumerate(y[i]):
        annotation = go.Annotation(x=x_val, y=y_val, text=y_val, xref="x", yref="y", showarrow=False)
        annotations.append(annotation)

fig.update_layout(annotations=annotations)

fig.show()



plotly.graph_objs.Annotation is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.layout.Annotation
  - plotly.graph_objs.layout.scene.Annotation




In [107]:
trace1 = go.Bar(
    x=[1, 2, 3],
    y=[4, 5, 6],
    name='Bar 1'
)

trace2 = go.Bar(
    x=[1, 2, 3],
    y=[7, 8, 9],
    name='Bar 2'
)

subplot = make_subplots(rows=1, cols=2, shared_yaxes=True)

subplot.add_trace(trace1)
subplot.add_trace(trace2)

In [109]:
x = ['A', 'B', 'C']
y = [1, 2, 3]

data = [go.Bar(    x = x,    y = y)]

layout = go.Layout(title = 'Bar Chart')

fig = go.Figure(data = data, layout = layout)

plotly.plot(fig, filename = 'bar-chart.html')

# Add values to the bar chart
data[0]['text'] = y
data[0]['textposition'] = 'auto'

plotly.update(fig, data, layout)


NameError: name 'plotly' is not defined

In [168]:
#fig = make_subplots(rows=2, cols=1, shared_yaxes=True)
fig = go.Figure(go.Bar())
y = pd.Series(['Ratings from Users with only one rating'])
#fig.update_layout(barmode='stack')

fig.add_trace(go.Bar(y=y, x=[4, 5, 6],
                    marker=dict(color=[4, 5, 6], coloraxis="coloraxis"), orientation='h'))
#fig.update_layout(barmode='overlay')

#fig.add_trace(go.Bar(y=[1, 2, 3], x=[2, 3, 5],
#                    marker=dict(color=[2, 3, 5], coloraxis="coloraxis"), orientation='h'),
#              2, 1)
#fig.update_layout(barmode='stack')
#fig.update_layout(coloraxis=dict(colorscale='Bluered_r'), showlegend=False)#, barmode='stack')
fig.update_layout(barmode='stack')
fig.show()

In [197]:
fig = make_subplots(rows=2, cols=1, shared_xaxes=True)
fig.update_layout(barmode='stack')

fig.add_trace(go.Bar(y=[1], x=[4, 5, 6],
                    marker=dict(color=[4, 5, 6], coloraxis="coloraxis",),  orientation='h'),
              1, 1)
fig.update_layout(barmode='stack')
fig.add_trace(go.Bar(y=[1], x=[2],
                    marker=dict(color=[2, 3, 5], coloraxis="coloraxis"), orientation='h'),
              2, 1)
fig.add_trace(go.Bar(y=[1], x=[3],
                    marker=dict(color=[2, 3, 5], coloraxis="coloraxis"), orientation='h'),
              2, 1)

fig.update_layout(coloraxis=dict(colorscale='Bluered_r'), showlegend=False)
fig.update_layout(barmode='stack')
fig.show()

In [178]:
wide_df = px.data.medals_wide()

wide_df = wide_df.drop([1, 2], axis=0)
wide_df

Unnamed: 0,nation,gold,silver,bronze
0,South Korea,24,13,11
