### Fix Names in a Table

In [3]:
import pandas as pd
import pandasql as ps

data = [[1, 'aLice'], [2, 'bOB']]
Users = pd.DataFrame(data, columns=['user_id', 'name']).astype({'user_id':'Int64', 'name':'object'})
print(Users)

   user_id   name
0        1  aLice
1        2    bOB


In [8]:
Users['name'] = Users['name'].str.capitalize()
print(Users.sort_values('user_id'))

   user_id   name
0        1  Alice
1        2    Bob


In [14]:
mysql_query = '''
select user_id, concat(upper(substr(name,1)), substr(name,2)) as name
from Users
order by user_id
'''

sqlite_query = '''
select user_id, upper(substr(name,1,1)) || substr(name,2) as name
from Users
order by user_id
'''

print(ps.sqldf(sqlite_query, locals()))

   user_id   name
0        1  Alice
1        2    Bob


### Find Users With Valid E-Mails

In [4]:
data = [[1, 'Winston', 'winston@leetcode.com'], [2, 'Jonathan', 'jonathanisgreat'], [3, 'Annabelle', 'bella-@leetcode.com'], [4, 'Sally', 'sally.come@leetcode.com'], [5, 'Marwan', 'quarz#2020@leetcode.com'], [6, 'David', 'david69@gmail.com'], [7, 'Shapiro', '.shapo@leetcode.com']]
Users = pd.DataFrame(data, columns=['user_id', 'name', 'mail']).astype({'user_id':'int64', 'name':'object', 'mail':'object'})
print(Users)

   user_id       name                     mail
0        1    Winston     winston@leetcode.com
1        2   Jonathan          jonathanisgreat
2        3  Annabelle      bella-@leetcode.com
3        4      Sally  sally.come@leetcode.com
4        5     Marwan  quarz#2020@leetcode.com
5        6      David        david69@gmail.com
6        7    Shapiro      .shapo@leetcode.com


In [6]:
Users[Users['mail'].str.match(r'^[A-Za-z][A-Za-z0-9_\.\-]*@leetcode\.com$')].reset_index(drop=True)

Unnamed: 0,user_id,name,mail
0,1,Winston,winston@leetcode.com
1,3,Annabelle,bella-@leetcode.com
2,4,Sally,sally.come@leetcode.com


In [7]:
query = '''
select user_id, name, mail
from Users
where mail regexp '^[A-Za-z][A-Za-z0-9_\.\-]*@leetcode\.com$'
'''

ps.sqldf(query, locals())

Unnamed: 0,user_id,name,mail
0,1,Winston,winston@leetcode.com
1,3,Annabelle,bella-@leetcode.com
2,4,Sally,sally.come@leetcode.com


### Patients With a Condition

In [9]:
data = [[1, 'Daniel', 'YFEV COUGH'], [2, 'Alice', ''], [3, 'Bob', 'DIAB100 MYOP'], [4, 'George', 'ACNE DIAB100'], [5, 'Alain', 'DIAB201']]
Patients = pd.DataFrame(data, columns=['patient_id', 'patient_name', 'conditions']).astype({'patient_id':'int64', 'patient_name':'object', 'conditions':'object'})
print(Patients)

   patient_id patient_name    conditions
0           1       Daniel    YFEV COUGH
1           2        Alice              
2           3          Bob  DIAB100 MYOP
3           4       George  ACNE DIAB100
4           5        Alain       DIAB201


In [15]:
print(Patients[Patients['conditions'].str.contains('DIAB1')].reset_index(drop=True))

   patient_id patient_name    conditions
0           3          Bob  DIAB100 MYOP
1           4       George  ACNE DIAB100


In [27]:
Patients[Patients['conditions'].str.match('.*DIAB1.*')]

Unnamed: 0,patient_id,patient_name,conditions
2,3,Bob,DIAB100 MYOP
3,4,George,ACNE DIAB100


In [29]:
query = '''
select *
from Patients
where conditions like '%DIAB1%'
'''

ps.sqldf(query, locals())

Unnamed: 0,patient_id,patient_name,conditions
0,3,Bob,DIAB100 MYOP
1,4,George,ACNE DIAB100
