In [61]:
from checkdigit import upc
import pandas as pd

In [62]:
# random sample from https://www.upcitemdb.com/
data = 'example_upc.csv'

In [63]:
df = pd.read_csv(data)

In [64]:
df

Unnamed: 0,upc
0,4055678528871
1,72945611900
2,13764028067
3,72250037129
4,73472001905
5,652729104134
6,24321930334
7,9542023782
8,31290113040
9,3282770075502


In [65]:
# Get trimmed UPC with no check digit
trim = [str(x)[:-1] for x in df['upc']]
df['trim_upc'] = trim

In [66]:
#get current check digit
current_check_digit = [str(x)[-1:] for x in df['upc']]
df['current_check_digit'] = current_check_digit

In [67]:
# get trimmed length 
trim_len = [len(str(x)) for x in df['trim_upc']]
df['trim_len'] = trim_len
df['trim_upc'] = df['trim_upc'].astype(str)

In [68]:
df

Unnamed: 0,upc,trim_upc,current_check_digit,trim_len
0,4055678528871,405567852887,1,12
1,72945611900,7294561190,0,10
2,13764028067,1376402806,7,10
3,72250037129,7225003712,9,10
4,73472001905,7347200190,5,10
5,652729104134,65272910413,4,11
6,24321930334,2432193033,4,10
7,9542023782,954202378,2,9
8,31290113040,3129011304,0,10
9,3282770075502,328277007550,2,12


https://en.wikipedia.org/wiki/Universal_Product_Code

Account for multiple UPC types <br>
- trimmed upc length is >=12 then pad to 13 <br>
- trimmed upc length is <12 then pad to 11

In [69]:
cond_upc = [x.zfill(13) if len(str(x)) >= 12 else x.zfill(11) for x in df['trim_upc']]

In [70]:
df['pad_upc'] = cond_upc

In [71]:
df

Unnamed: 0,upc,trim_upc,current_check_digit,trim_len,pad_upc
0,4055678528871,405567852887,1,12,405567852887
1,72945611900,7294561190,0,10,7294561190
2,13764028067,1376402806,7,10,1376402806
3,72250037129,7225003712,9,10,7225003712
4,73472001905,7347200190,5,10,7347200190
5,652729104134,65272910413,4,11,65272910413
6,24321930334,2432193033,4,10,2432193033
7,9542023782,954202378,2,9,954202378
8,31290113040,3129011304,0,10,3129011304
9,3282770075502,328277007550,2,12,328277007550


In [72]:
# get check digit
df['correct_check_digit'] = [upc.upc_calculate(x) for x in df['pad_upc']]

# create corrected upc
df['correct_upc'] = df['pad_upc'] + df['correct_check_digit']

In [73]:
df

Unnamed: 0,upc,trim_upc,current_check_digit,trim_len,pad_upc,correct_check_digit,correct_upc
0,4055678528871,405567852887,1,12,405567852887,1,4055678528871
1,72945611900,7294561190,0,10,7294561190,0,72945611900
2,13764028067,1376402806,7,10,1376402806,7,13764028067
3,72250037129,7225003712,9,10,7225003712,9,72250037129
4,73472001905,7347200190,5,10,7347200190,5,73472001905
5,652729104134,65272910413,4,11,65272910413,4,652729104134
6,24321930334,2432193033,4,10,2432193033,4,24321930334
7,9542023782,954202378,2,9,954202378,2,9542023782
8,31290113040,3129011304,0,10,3129011304,0,31290113040
9,3282770075502,328277007550,2,12,328277007550,2,3282770075502


In [74]:
# df['correct_upc'].to_csv('correct_upc.csv',index=False)