# List total online timespan of servers 

In [1]:
import pandas as pd
from datetime import date

In [2]:
df = pd.DataFrame({'server_id': [1,1,2,2,3,3,4,4,4,4,5],
                      'repair_status': ['repair', 'online','repair', 'online','repair', 'online','repair', 'online', 'repair', 'online', 'repair'],
                   'as_of_date':['2020-01-01','2020-01-08','2020-01-05','2020-01-11','2020-01-10','2020-01-22','2020-02-02','2020-02-04','2020-02-10','2020-02-16','2020-03-01']})
df['date']=pd.to_datetime(df['as_of_date'])
df

Unnamed: 0,server_id,repair_status,as_of_date,date
0,1,repair,2020-01-01,2020-01-01
1,1,online,2020-01-08,2020-01-08
2,2,repair,2020-01-05,2020-01-05
3,2,online,2020-01-11,2020-01-11
4,3,repair,2020-01-10,2020-01-10
5,3,online,2020-01-22,2020-01-22
6,4,repair,2020-02-02,2020-02-02
7,4,online,2020-02-04,2020-02-04
8,4,repair,2020-02-10,2020-02-10
9,4,online,2020-02-16,2020-02-16


In [3]:
df_re = df.loc[df['repair_status']=='repair',['server_id','date']]
df_on = df.loc[df['repair_status']=='online',['server_id','date']]

df_re['rn'] = df_re.groupby('server_id')['date'].rank(method='first')
df_on['rn'] = df_on.groupby('server_id')['date'].rank(method='first')

df2 = pd.merge(df_re, df_on, how='left', on=['server_id','rn'])
df2 = df2.fillna(pd.to_datetime('today'))
df2['days'] = (df2['date_y']-df2['date_x']).dt.days
df2

Unnamed: 0,server_id,date_x,rn,date_y,days
0,1,2020-01-01,1.0,2020-01-08 00:00:00.000000,7
1,2,2020-01-05,1.0,2020-01-11 00:00:00.000000,6
2,3,2020-01-10,1.0,2020-01-22 00:00:00.000000,12
3,4,2020-02-02,1.0,2020-02-04 00:00:00.000000,2
4,4,2020-02-10,2.0,2020-02-16 00:00:00.000000,6
5,5,2020-03-01,1.0,2021-05-20 17:02:00.140418,445


In [4]:
df3 = df2.groupby('server_id')['days'].sum()
pd.DataFrame(df3).reset_index()

Unnamed: 0,server_id,days
0,1,7
1,2,6
2,3,12
3,4,8
4,5,445


In [5]:
# SQL solution

'''

CREATE TABLE IF NOT EXISTS Repair (
    Id INT,
    r_status VARCHAR(50),
    as_date VARCHAR(50)
);

DELETE FROM Repair;

INSERT INTO Repair VALUES
(1,        'repair',        '2020-01-01'),
(1,        'online',        '2020-01-08'),
(2,        'repair',        '2020-01-05'),
(2,        'online',        '2020-01-11'),
(3,        'repair',        '2020-01-10'),
(3,        'online',   '2020-01-22'),
(4,        'repair',        '2020-02-02'),
(4,        'online',        '2020-02-04'),
(4,        'repair',        '2020-02-10'),
(4,        'online',        '2020-02-16'),
(5,        'repair',        '2020-03-01');

Select a.Id as server_id, sum(ifnull(datediff(b.as_date,a.as_date),datediff(curdate(),a.as_date))) as repair_days From
(select *,ROW_NUMBER() OVER(Partition by Id Order BY as_date) rn from Repair where r_status = 'repair')  a 
 left join
(select *,ROW_NUMBER() OVER(Partition by Id Order BY as_date) rn from Repair where r_status = 'online')  b
on a.Id = b.Id and a.rn = b.rn
group by a.Id

Output
server_id        repair_days
1        7
2        6
3        12
4        8
5        218

'''

"\n\nCREATE TABLE IF NOT EXISTS Repair (\n\xa0 \xa0 Id INT,\n\xa0 \xa0 r_status VARCHAR(50),\n\xa0 \xa0 as_date VARCHAR(50)\n);\n\nDELETE FROM Repair;\n\nINSERT INTO Repair VALUES\n(1,\xa0 \xa0 \xa0 \xa0 'repair',\xa0 \xa0 \xa0 \xa0 '2020-01-01'),\n(1,\xa0 \xa0 \xa0 \xa0 'online',\xa0 \xa0 \xa0 \xa0 '2020-01-08'),\n(2,\xa0 \xa0 \xa0 \xa0 'repair',\xa0 \xa0 \xa0 \xa0 '2020-01-05'),\n(2,\xa0 \xa0 \xa0 \xa0 'online',\xa0 \xa0 \xa0 \xa0 '2020-01-11'),\n(3,\xa0 \xa0 \xa0 \xa0 'repair',\xa0 \xa0 \xa0 \xa0 '2020-01-10'),\n(3,\xa0 \xa0 \xa0 \xa0 'online',\xa0 \xa0'2020-01-22'),\n(4,\xa0 \xa0 \xa0 \xa0 'repair',\xa0 \xa0 \xa0 \xa0 '2020-02-02'),\n(4,\xa0 \xa0 \xa0 \xa0 'online',\xa0 \xa0 \xa0 \xa0 '2020-02-04'),\n(4,\xa0 \xa0 \xa0 \xa0 'repair',\xa0 \xa0 \xa0 \xa0 '2020-02-10'),\n(4,\xa0 \xa0 \xa0 \xa0 'online',\xa0 \xa0 \xa0 \xa0 '2020-02-16'),\n(5,\xa0 \xa0 \xa0 \xa0 'repair',\xa0 \xa0 \xa0 \xa0 '2020-03-01');\n\nSelect a.Id as server_id, sum(ifnull(datediff(b.as_date,a.as_date),datediff(curd