In [1]:
import pandas as pd

In [3]:
colunas: list[str] = ["erros", "mapas", "data", "mapas_avaria"]

df: pd.DataFrame = pd.read_csv("data.csv", 
    names=colunas,
    skiprows=1,
).dropna()

df.head()

Unnamed: 0,erros,mapas,data,mapas_avaria
22,9.0,52.0,2024-04-02 00:00:00,"17,31%"
23,20.0,48.0,2024-04-03 00:00:00,"41,67%"
24,18.0,55.0,2024-04-04 00:00:00,"32,73%"
25,31.0,53.0,2024-04-05 00:00:00,"58,49%"
26,34.0,46.0,2024-04-06 00:00:00,"73,91%"


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 104 entries, 22 to 152
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   erros         104 non-null    float64
 1   mapas         104 non-null    float64
 2   data          104 non-null    object 
 3   mapas_avaria  104 non-null    object 
dtypes: float64(2), object(2)
memory usage: 4.1+ KB


In [5]:
df['mapas_avaria'] = df['mapas_avaria'].\
                    str.replace(',', '.').\
                    str.replace('%', '').\
                    astype(float)

In [6]:
df.head()

Unnamed: 0,erros,mapas,data,mapas_avaria
22,9.0,52.0,2024-04-02 00:00:00,17.31
23,20.0,48.0,2024-04-03 00:00:00,41.67
24,18.0,55.0,2024-04-04 00:00:00,32.73
25,31.0,53.0,2024-04-05 00:00:00,58.49
26,34.0,46.0,2024-04-06 00:00:00,73.91


In [7]:
df['data'] = pd.to_datetime(df['data'])

In [8]:
df.head()

Unnamed: 0,erros,mapas,data,mapas_avaria
22,9.0,52.0,2024-04-02,17.31
23,20.0,48.0,2024-04-03,41.67
24,18.0,55.0,2024-04-04,32.73
25,31.0,53.0,2024-04-05,58.49
26,34.0,46.0,2024-04-06,73.91


In [9]:
df['semana'] = df['data'].dt.isocalendar().week

In [10]:
df.head()

Unnamed: 0,erros,mapas,data,mapas_avaria,semana
22,9.0,52.0,2024-04-02,17.31,14
23,20.0,48.0,2024-04-03,41.67,14
24,18.0,55.0,2024-04-04,32.73,14
25,31.0,53.0,2024-04-05,58.49,14
26,34.0,46.0,2024-04-06,73.91,14


In [11]:
df: pd.DataFrame = df.groupby(['semana'], as_index=False).agg(
    {'erros': 'sum', 'mapas': 'sum', 'mapas_avaria': 'mean'}
)

In [12]:
df.head()

Unnamed: 0,semana,erros,mapas,mapas_avaria
0,14,112.0,254.0,44.822
1,15,145.0,263.0,54.536
2,16,65.0,155.0,42.88
3,18,38.0,54.0,70.37
4,19,176.0,280.0,62.168


In [13]:
df['erros_percentual'] = round((df['erros'] / df['mapas']) * 100, 2)

probabilidade: float = df['erros'].sum() / df['mapas'].sum()

In [14]:
def limites_controle(observacoes: int, conformidade: int, p: float = 0.0, tipo: int = 1) -> tuple[float, float, float]:
    if p == 0.0:
        p = (observacoes - conformidade) / observacoes if tipo == 1 else conformidade / observacoes
        
    desvio: float = (p * (1 - p) * (1 / observacoes)) ** (1 / 2)
    lic: float = p - 3 * desvio
    lsc: float = p + 3 * desvio
    
    return lic, p, lsc

In [15]:
df[['lic', 'p', 'lsc']] = df.apply(
    lambda x:
    limites_controle(
        x['mapas'],
        x['erros'],
        p=probabilidade,
        tipo=1
    ), axis=1, result_type='expand'
)

In [16]:
df

Unnamed: 0,semana,erros,mapas,mapas_avaria,erros_percentual,lic,p,lsc
0,14,112.0,254.0,44.822,44.09,0.383025,0.477044,0.571064
1,15,145.0,263.0,54.536,55.13,0.384648,0.477044,0.569441
2,16,65.0,155.0,42.88,41.94,0.356689,0.477044,0.5974
3,18,38.0,54.0,70.37,70.37,0.273136,0.477044,0.680953
4,19,176.0,280.0,62.168,62.86,0.387497,0.477044,0.566592
5,20,115.0,226.0,50.8525,50.88,0.377371,0.477044,0.576718
6,21,85.0,262.0,34.09,32.44,0.384472,0.477044,0.569617
7,22,163.0,251.0,79.438,64.94,0.382465,0.477044,0.571624
8,23,181.0,263.0,68.62,68.82,0.384648,0.477044,0.569441
9,24,117.0,260.0,44.554,45.0,0.384117,0.477044,0.569972
